From 897bf339a37378b0254dbfb610c8a6613726c25e Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 13 Nov 2019 19:05:40 +0300 Subject: [PATCH 01/21] Add fastParseToMicros and tests --- .../sql/catalyst/util/DateTimeUtils.scala | 35 ++++++++++++++++++- .../catalyst/util/DateTimeUtilsSuite.scala | 22 ++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 81d7274607ac..9b9e0f2a25e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -19,13 +19,15 @@ package org.apache.spark.sql.catalyst.util import java.sql.{Date, Timestamp} import java.text.{DateFormat, SimpleDateFormat} -import java.util.{Calendar, Locale, TimeZone} +import java.util.{Calendar, GregorianCalendar, Locale, TimeZone} import java.util.concurrent.ConcurrentHashMap import java.util.function.{Function => JFunction} import javax.xml.bind.DatatypeConverter import scala.annotation.tailrec +import org.apache.commons.lang3.time.FastDateFormat + import org.apache.spark.unsafe.types.UTF8String /** @@ -1147,4 +1149,35 @@ object DateTimeUtils { threadLocalTimestampFormat.remove() threadLocalDateFormat.remove() } + + class MicrosCalendar(timeZone: TimeZone, locale: Locale) + extends GregorianCalendar(timeZone, locale) { + def getMicros(): SQLTimestamp = { + var fraction = fields(Calendar.MILLISECOND) + if (fraction < MICROS_PER_MILLIS) { + fraction *= MICROS_PER_MILLIS.toInt + } else if (fraction >= MICROS_PER_SECOND) { + do { + fraction /= 10 + } while (fraction >= MICROS_PER_SECOND) + } + fraction + } + } + + def fastParseToMicros( + parser: FastDateFormat, + s: String, + timeZone: TimeZone, + locale: Locale): SQLTimestamp = { + val pos = new java.text.ParsePosition(0) + val cal = new MicrosCalendar(timeZone, locale) + cal.clear() + if (!parser.parse(s, pos, cal)) { + throw new IllegalArgumentException(s) + } + val micros = cal.getMicros() + cal.set(Calendar.MILLISECOND, 0) + cal.getTimeInMillis * MICROS_PER_MILLIS + micros + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 242366839223..352fd9560b50 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -21,6 +21,8 @@ import java.sql.{Date, Timestamp} import java.text.SimpleDateFormat import java.util.{Calendar, Locale, TimeZone} +import org.apache.commons.lang3.time.FastDateFormat + import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.unsafe.types.UTF8String @@ -680,4 +682,24 @@ class DateTimeUtilsSuite extends SparkFunSuite { } } } + + test("fast parse to micros") { + val locale = Locale.US + val timeZone = TimeZoneUTC + def check(pattern: String, input: String, reference: String): Unit = { + val parser = FastDateFormat.getInstance(pattern, timeZone, locale) + val expected = DateTimeUtils.stringToTimestamp( + UTF8String.fromString(reference), timeZone).get + val actual = fastParseToMicros(parser, input, timeZone, locale) + assert(actual === expected) + } + check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSXXX", + "2019-10-14T09:39:07.3220000Z", "2019-10-14T09:39:07.322Z") + check("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", + "2019-10-14T09:39:07.322Z", "2019-10-14T09:39:07.322Z") + check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX", + "2019-10-14T09:39:07.123456Z", "2019-10-14T09:39:07.123456Z") + check("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", + "2019-10-14T09:39:07.123Z", "2019-10-14T09:39:07.123Z") + } } From b18c61aea9b2cd36d157aae8d4bb9039fe838c52 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 13 Nov 2019 19:14:09 +0300 Subject: [PATCH 02/21] Use fastParseToMicros in JacksonParser --- .../spark/sql/catalyst/json/JacksonParser.scala | 3 ++- .../spark/sql/catalyst/util/DateTimeUtils.scala | 11 +++-------- .../spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 5 ++--- 3 files changed, 7 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 984979ac5e9b..8d306ad1b0df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -29,6 +29,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.catalyst.util.DateTimeUtils.fastParseToMicros import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils @@ -216,7 +217,7 @@ class JacksonParser( // This one will lose microseconds parts. // See https://issues.apache.org/jira/browse/SPARK-10681. Long.box { - Try(options.timestampFormat.parse(stringValue).getTime * 1000L) + Try(fastParseToMicros(options.timestampFormat, stringValue, options.timeZone)) .getOrElse { // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards // compatibility. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 9b9e0f2a25e9..6c20d4be7dfa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1150,8 +1150,7 @@ object DateTimeUtils { threadLocalDateFormat.remove() } - class MicrosCalendar(timeZone: TimeZone, locale: Locale) - extends GregorianCalendar(timeZone, locale) { + class MicrosCalendar(tz: TimeZone) extends GregorianCalendar(tz, Locale.US) { def getMicros(): SQLTimestamp = { var fraction = fields(Calendar.MILLISECOND) if (fraction < MICROS_PER_MILLIS) { @@ -1165,13 +1164,9 @@ object DateTimeUtils { } } - def fastParseToMicros( - parser: FastDateFormat, - s: String, - timeZone: TimeZone, - locale: Locale): SQLTimestamp = { + def fastParseToMicros(parser: FastDateFormat, s: String, tz: TimeZone): SQLTimestamp = { val pos = new java.text.ParsePosition(0) - val cal = new MicrosCalendar(timeZone, locale) + val cal = new MicrosCalendar(tz) cal.clear() if (!parser.parse(s, pos, cal)) { throw new IllegalArgumentException(s) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 352fd9560b50..6f502c2fa2c1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -684,13 +684,12 @@ class DateTimeUtilsSuite extends SparkFunSuite { } test("fast parse to micros") { - val locale = Locale.US val timeZone = TimeZoneUTC def check(pattern: String, input: String, reference: String): Unit = { - val parser = FastDateFormat.getInstance(pattern, timeZone, locale) + val parser = FastDateFormat.getInstance(pattern, timeZone, Locale.US) val expected = DateTimeUtils.stringToTimestamp( UTF8String.fromString(reference), timeZone).get - val actual = fastParseToMicros(parser, input, timeZone, locale) + val actual = fastParseToMicros(parser, input, timeZone) assert(actual === expected) } check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSXXX", From 915a755a38ad7a953966dfdfeda452d35a96b631 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Wed, 13 Nov 2019 19:30:40 +0300 Subject: [PATCH 03/21] Add a test for from_json() --- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 53ae1e0249e6..35087ce5ec5c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -518,4 +518,14 @@ class JsonFunctionsSuite extends QueryTest with SharedSQLContext { jsonDF.select(to_json(from_json($"a", schema))), Seq(Row(json))) } + + test("from_json - timestamp in micros") { + val df = Seq("""{"time": "1970-01-01T00:00:00.123456"}""").toDS() + val schema = new StructType().add("time", TimestampType) + val options = Map("timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss.SSSSSS") + + checkAnswer( + df.select(from_json($"value", schema, options)), + Row(Row(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.123456")))) + } } From 453ee5c65e819cc79c9b5173483802f5e5fcf3b8 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 00:06:01 +0300 Subject: [PATCH 04/21] Accept only SSS or SSSSSS or without fraction --- .../sql/catalyst/util/DateTimeUtils.scala | 19 +++++++++---------- .../catalyst/util/DateTimeUtilsSuite.scala | 8 ++++---- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index fa4d938a81b8..9bcc64d3e6cf 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1168,16 +1168,15 @@ object DateTimeUtils { } class MicrosCalendar(tz: TimeZone) extends GregorianCalendar(tz, Locale.US) { - def getMicros(): SQLTimestamp = { - var fraction = fields(Calendar.MILLISECOND) - if (fraction < MICROS_PER_MILLIS) { - fraction *= MICROS_PER_MILLIS.toInt - } else if (fraction >= MICROS_PER_SECOND) { - do { - fraction /= 10 - } while (fraction >= MICROS_PER_SECOND) + def getMicros(digitsInFraction: Int): SQLTimestamp = { + digitsInFraction match { + case 0 => 0 + case 3 => fields(Calendar.MILLISECOND) * MICROS_PER_MILLIS + case 6 => fields(Calendar.MILLISECOND) + case _ => + throw new IllegalArgumentException( + s"Supported only 0, 3 or 6 digits in the second fraction but got ${digitsInFraction}") } - fraction } } @@ -1188,7 +1187,7 @@ object DateTimeUtils { if (!parser.parse(s, pos, cal)) { throw new IllegalArgumentException(s) } - val micros = cal.getMicros() + val micros = cal.getMicros(parser.getPattern.count(_ == 'S')) cal.set(Calendar.MILLISECOND, 0) cal.getTimeInMillis * MICROS_PER_MILLIS + micros } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 20a03ddbdfbf..af5ed6973678 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -704,13 +704,13 @@ class DateTimeUtilsSuite extends SparkFunSuite { val actual = fastParseToMicros(parser, input, timeZone) assert(actual === expected) } - check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSXXX", - "2019-10-14T09:39:07.3220000Z", "2019-10-14T09:39:07.322Z") + check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX", + "2019-10-14T09:39:07.322000Z", "2019-10-14T09:39:07.322Z") check("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", "2019-10-14T09:39:07.322Z", "2019-10-14T09:39:07.322Z") check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX", "2019-10-14T09:39:07.123456Z", "2019-10-14T09:39:07.123456Z") - check("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", - "2019-10-14T09:39:07.123Z", "2019-10-14T09:39:07.123Z") + check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX", + "2019-10-14T09:39:07.000010Z", "2019-10-14T09:39:07.000010Z") } } From 86ac2b2b0a81aa44b9b9a9133149db15a18dc7cc Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 02:07:28 +0300 Subject: [PATCH 05/21] Generic approach --- .../spark/sql/catalyst/util/DateTimeUtils.scala | 11 +++-------- .../scala/org/apache/spark/sql/types/Decimal.scala | 2 +- .../spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 10 +++++++--- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 9bcc64d3e6cf..3fba05896930 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -29,6 +29,7 @@ import scala.annotation.tailrec import org.apache.commons.lang3.time.FastDateFormat +import org.apache.spark.sql.types.Decimal import org.apache.spark.unsafe.types.UTF8String /** @@ -1169,14 +1170,8 @@ object DateTimeUtils { class MicrosCalendar(tz: TimeZone) extends GregorianCalendar(tz, Locale.US) { def getMicros(digitsInFraction: Int): SQLTimestamp = { - digitsInFraction match { - case 0 => 0 - case 3 => fields(Calendar.MILLISECOND) * MICROS_PER_MILLIS - case 6 => fields(Calendar.MILLISECOND) - case _ => - throw new IllegalArgumentException( - s"Supported only 0, 3 or 6 digits in the second fraction but got ${digitsInFraction}") - } + val d = fields(Calendar.MILLISECOND) * MICROS_PER_SECOND + d / Decimal.POW_10(digitsInFraction) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala index 121823249a7f..0da28c403816 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala @@ -432,7 +432,7 @@ object Decimal { /** Maximum number of decimal digits a Long can represent */ val MAX_LONG_DIGITS = 18 - private val POW_10 = Array.tabulate[Long](MAX_LONG_DIGITS + 1)(i => math.pow(10, i).toLong) + val POW_10 = Array.tabulate[Long](MAX_LONG_DIGITS + 1)(i => math.pow(10, i).toLong) private val BIG_DEC_ZERO = BigDecimal(0) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index af5ed6973678..19e810d24b0d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -704,13 +704,17 @@ class DateTimeUtilsSuite extends SparkFunSuite { val actual = fastParseToMicros(parser, input, timeZone) assert(actual === expected) } + check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSXXX", + "2019-10-14T09:39:07.3220000Z", "2019-10-14T09:39:07.322Z") check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX", "2019-10-14T09:39:07.322000Z", "2019-10-14T09:39:07.322Z") - check("yyyy-MM-dd'T'HH:mm:ss.SSSXXX", + check("yyyy-MM-dd'T'HH:mm:ss.SSSX", "2019-10-14T09:39:07.322Z", "2019-10-14T09:39:07.322Z") - check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX", + check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSX", "2019-10-14T09:39:07.123456Z", "2019-10-14T09:39:07.123456Z") - check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX", + check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSX", "2019-10-14T09:39:07.000010Z", "2019-10-14T09:39:07.000010Z") + check("yyyy-MM-dd'T'HH:mm:ss.SX", + "2019-10-14T09:39:07.1Z", "2019-10-14T09:39:07.1Z") } } From 9c825382443cf12fc8e475d48f09b12f40b76ef0 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 14:17:34 +0300 Subject: [PATCH 06/21] Make MicrosCalendar private --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 3fba05896930..5765e5c6bcbb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1168,7 +1168,7 @@ object DateTimeUtils { threadLocalDateFormat.remove() } - class MicrosCalendar(tz: TimeZone) extends GregorianCalendar(tz, Locale.US) { + private class MicrosCalendar(tz: TimeZone) extends GregorianCalendar(tz, Locale.US) { def getMicros(digitsInFraction: Int): SQLTimestamp = { val d = fields(Calendar.MILLISECOND) * MICROS_PER_SECOND d / Decimal.POW_10(digitsInFraction) From 9a446aac19af47cacd9b3b87a0ba4dd3e813b7c1 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 14:46:59 +0300 Subject: [PATCH 07/21] Optimizations: produce less garbage --- .../sql/catalyst/json/JacksonParser.scala | 6 ++-- .../sql/catalyst/util/DateTimeUtils.scala | 31 +++++++++++-------- .../catalyst/util/DateTimeUtilsSuite.scala | 8 +++-- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 8d306ad1b0df..aa850dbd43a8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -29,7 +29,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils.fastParseToMicros +import org.apache.spark.sql.catalyst.util.DateTimeUtils.getDateTimeParser import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils @@ -54,6 +54,8 @@ class JacksonParser( private val factory = new JsonFactory() options.setJacksonOptions(factory) + private val dateTimeParser = getDateTimeParser(options.dateFormat, options.timeZone) + /** * Create a converter which converts the JSON documents held by the `JsonParser` * to a value according to a desired schema. This is a wrapper for the method @@ -217,7 +219,7 @@ class JacksonParser( // This one will lose microseconds parts. // See https://issues.apache.org/jira/browse/SPARK-10681. Long.box { - Try(fastParseToMicros(options.timestampFormat, stringValue, options.timeZone)) + Try(dateTimeParser.parse(stringValue)) .getOrElse { // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards // compatibility. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 5765e5c6bcbb..7c70325a0dd6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -18,17 +18,16 @@ package org.apache.spark.sql.catalyst.util import java.sql.{Date, Timestamp} -import java.text.{DateFormat, SimpleDateFormat} +import java.text.{DateFormat, ParsePosition, SimpleDateFormat} import java.time.Instant import java.util.{Calendar, GregorianCalendar, Locale, TimeZone} import java.util.concurrent.ConcurrentHashMap import java.util.function.{Function => JFunction} + import javax.xml.bind.DatatypeConverter import scala.annotation.tailrec - import org.apache.commons.lang3.time.FastDateFormat - import org.apache.spark.sql.types.Decimal import org.apache.spark.unsafe.types.UTF8String @@ -1168,22 +1167,28 @@ object DateTimeUtils { threadLocalDateFormat.remove() } - private class MicrosCalendar(tz: TimeZone) extends GregorianCalendar(tz, Locale.US) { + class MicrosCalendar(tz: TimeZone) extends GregorianCalendar(tz, Locale.US) { def getMicros(digitsInFraction: Int): SQLTimestamp = { val d = fields(Calendar.MILLISECOND) * MICROS_PER_SECOND d / Decimal.POW_10(digitsInFraction) } } - def fastParseToMicros(parser: FastDateFormat, s: String, tz: TimeZone): SQLTimestamp = { - val pos = new java.text.ParsePosition(0) - val cal = new MicrosCalendar(tz) - cal.clear() - if (!parser.parse(s, pos, cal)) { - throw new IllegalArgumentException(s) + class DateTimeParser(format: FastDateFormat, digitsInFraction: Int, cal: MicrosCalendar) { + private val startPos = new ParsePosition(0) + + def parse(s: String): SQLTimestamp = { + cal.clear() + if (!format.parse(s, startPos, cal)) { + throw new IllegalArgumentException(s) + } + val micros = cal.getMicros(digitsInFraction) + cal.set(Calendar.MILLISECOND, 0) + cal.getTimeInMillis * MICROS_PER_MILLIS + micros } - val micros = cal.getMicros(parser.getPattern.count(_ == 'S')) - cal.set(Calendar.MILLISECOND, 0) - cal.getTimeInMillis * MICROS_PER_MILLIS + micros + } + + def getDateTimeParser(format: FastDateFormat, tz: TimeZone): DateTimeParser = { + new DateTimeParser(format, format.getPattern.count(_ == 'S'), new MicrosCalendar(tz)) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 19e810d24b0d..463a367ec87e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -698,10 +698,12 @@ class DateTimeUtilsSuite extends SparkFunSuite { test("fast parse to micros") { val timeZone = TimeZoneUTC def check(pattern: String, input: String, reference: String): Unit = { - val parser = FastDateFormat.getInstance(pattern, timeZone, Locale.US) + val parser = getDateTimeParser( + FastDateFormat.getInstance(pattern, timeZone, Locale.US), + timeZone) val expected = DateTimeUtils.stringToTimestamp( UTF8String.fromString(reference), timeZone).get - val actual = fastParseToMicros(parser, input, timeZone) + val actual = parser.parse(input) assert(actual === expected) } check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSXXX", @@ -716,5 +718,7 @@ class DateTimeUtilsSuite extends SparkFunSuite { "2019-10-14T09:39:07.000010Z", "2019-10-14T09:39:07.000010Z") check("yyyy-MM-dd'T'HH:mm:ss.SX", "2019-10-14T09:39:07.1Z", "2019-10-14T09:39:07.1Z") + check("yyyy-MM-dd'T'HH:mm:ss.SSX", + "2019-10-14T09:39:07.10Z", "2019-10-14T09:39:07.1Z") } } From ef8284c77809f3222a2a417f51898480fdc3f031 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 14:48:38 +0300 Subject: [PATCH 08/21] Fix imports --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 7c70325a0dd6..edf661ef19fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -23,11 +23,12 @@ import java.time.Instant import java.util.{Calendar, GregorianCalendar, Locale, TimeZone} import java.util.concurrent.ConcurrentHashMap import java.util.function.{Function => JFunction} - import javax.xml.bind.DatatypeConverter import scala.annotation.tailrec + import org.apache.commons.lang3.time.FastDateFormat + import org.apache.spark.sql.types.Decimal import org.apache.spark.unsafe.types.UTF8String From 638c640f7821b194c209b1f888fc1183e65faba2 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 15:37:30 +0300 Subject: [PATCH 09/21] Bug fixes --- .../spark/sql/catalyst/json/JacksonParser.scala | 16 +++++++--------- .../spark/sql/catalyst/util/DateTimeUtils.scala | 4 +--- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index aa850dbd43a8..d310a7292ec4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -54,7 +54,8 @@ class JacksonParser( private val factory = new JsonFactory() options.setJacksonOptions(factory) - private val dateTimeParser = getDateTimeParser(options.dateFormat, options.timeZone) + @transient private lazy val dateTimeParser = + getDateTimeParser(options.timestampFormat, options.timeZone) /** * Create a converter which converts the JSON documents held by the `JsonParser` @@ -216,15 +217,12 @@ class JacksonParser( (parser: JsonParser) => parseJsonToken[java.lang.Long](parser, dataType) { case VALUE_STRING => val stringValue = parser.getText - // This one will lose microseconds parts. - // See https://issues.apache.org/jira/browse/SPARK-10681. Long.box { - Try(dateTimeParser.parse(stringValue)) - .getOrElse { - // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards - // compatibility. - DateTimeUtils.stringToTime(stringValue).getTime * 1000L - } + Try(dateTimeParser.parse(stringValue)).getOrElse { + // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards + // compatibility. + DateTimeUtils.stringToTime(stringValue).getTime * 1000L + } } case VALUE_NUMBER_INT => diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index edf661ef19fa..29144194ffcb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1176,11 +1176,9 @@ object DateTimeUtils { } class DateTimeParser(format: FastDateFormat, digitsInFraction: Int, cal: MicrosCalendar) { - private val startPos = new ParsePosition(0) - def parse(s: String): SQLTimestamp = { cal.clear() - if (!format.parse(s, startPos, cal)) { + if (!format.parse(s, new ParsePosition(0), cal)) { throw new IllegalArgumentException(s) } val micros = cal.getMicros(digitsInFraction) From eb58d035ff7badda4074a88f5b843b55e4c54683 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 16:22:07 +0300 Subject: [PATCH 10/21] Improve error message --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 29144194ffcb..505fa67ce711 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1179,7 +1179,7 @@ object DateTimeUtils { def parse(s: String): SQLTimestamp = { cal.clear() if (!format.parse(s, new ParsePosition(0), cal)) { - throw new IllegalArgumentException(s) + throw new IllegalArgumentException(s"'$s' is an invalid timestamp") } val micros = cal.getMicros(digitsInFraction) cal.set(Calendar.MILLISECOND, 0) From 274f25ab7f981a79788aa6d81e8178aecf79f8e2 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 16:23:50 +0300 Subject: [PATCH 11/21] dateTimeParser -> timestampParser --- .../org/apache/spark/sql/catalyst/json/JacksonParser.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index d310a7292ec4..8711900f2b36 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -54,7 +54,7 @@ class JacksonParser( private val factory = new JsonFactory() options.setJacksonOptions(factory) - @transient private lazy val dateTimeParser = + @transient private lazy val timestampParser = getDateTimeParser(options.timestampFormat, options.timeZone) /** @@ -218,7 +218,7 @@ class JacksonParser( case VALUE_STRING => val stringValue = parser.getText Long.box { - Try(dateTimeParser.parse(stringValue)).getOrElse { + Try(timestampParser.parse(stringValue)).getOrElse { // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards // compatibility. DateTimeUtils.stringToTime(stringValue).getTime * 1000L From 66d1100601a3ffdf5cfa0f50315df0c313c719d4 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 16:44:48 +0300 Subject: [PATCH 12/21] Fix CSV as well --- .../datasources/csv/UnivocityParser.scala | 9 +-- .../csv/UnivocityParserSuite.scala | 59 ++++++++++--------- 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala index 69bd11f0ae3b..2c0a627c1ea2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala @@ -29,6 +29,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{BadRecordException, DateTimeUtils} +import org.apache.spark.sql.catalyst.util.DateTimeUtils.getDateTimeParser import org.apache.spark.sql.execution.datasources.FailureSafeParser import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -77,6 +78,9 @@ class UnivocityParser( private val row = new GenericInternalRow(requiredSchema.length) + @transient private lazy val timestampParser = + getDateTimeParser(options.timestampFormat, options.timeZone) + // Retrieve the raw record string. private def getCurrentInput: UTF8String = { UTF8String.fromString(tokenizer.getContext.currentParsedContent().stripLineEnd) @@ -156,10 +160,7 @@ class UnivocityParser( case _: TimestampType => (d: String) => nullSafeDatum(d, name, nullable, options) { datum => - // This one will lose microseconds parts. - // See https://issues.apache.org/jira/browse/SPARK-10681. - Try(options.timestampFormat.parse(datum).getTime * 1000L) - .getOrElse { + Try(timestampParser.parse(datum)).getOrElse { // If it fails to parse, then tries the way used in 2.0 and 1.x for backwards // compatibility. DateTimeUtils.stringToTime(datum).getTime * 1000L diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParserSuite.scala index 458edb253fb3..96011cf1ee42 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParserSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParserSuite.scala @@ -26,9 +26,9 @@ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String class UnivocityParserSuite extends SparkFunSuite { - private val parser = new UnivocityParser( - StructType(Seq.empty), - new CSVOptions(Map.empty[String, String], false, "GMT")) + private def getParser(options: CSVOptions) = { + new UnivocityParser(StructType(Seq.empty), options) + } private def assertNull(v: Any) = assert(v == null) @@ -40,8 +40,10 @@ class UnivocityParserSuite extends SparkFunSuite { stringValues.zip(decimalValues).foreach { case (strVal, decimalVal) => val decimalValue = new BigDecimal(decimalVal.toString) val options = new CSVOptions(Map.empty[String, String], false, "GMT") - assert(parser.makeConverter("_1", decimalType, options = options).apply(strVal) === - Decimal(decimalValue, decimalType.precision, decimalType.scale)) + assert( + getParser(options) + .makeConverter("_1", decimalType, options = options) + .apply(strVal) === Decimal(decimalValue, decimalType.precision, decimalType.scale)) } } @@ -53,13 +55,14 @@ class UnivocityParserSuite extends SparkFunSuite { types.foreach { t => // Tests that a custom nullValue. val nullValueOptions = new CSVOptions(Map("nullValue" -> "-"), false, "GMT") - val converter = - parser.makeConverter("_1", t, nullable = true, options = nullValueOptions) + val converter = getParser(nullValueOptions) + .makeConverter("_1", t, nullable = true, options = nullValueOptions) assertNull(converter.apply("-")) assertNull(converter.apply(null)) // Tests that the default nullValue is empty string. val options = new CSVOptions(Map.empty[String, String], false, "GMT") + val parser = getParser(options) assertNull(parser.makeConverter("_1", t, nullable = true, options = options).apply("")) } @@ -67,8 +70,8 @@ class UnivocityParserSuite extends SparkFunSuite { types.foreach { t => // Casts a null to not nullable field should throw an exception. val options = new CSVOptions(Map("nullValue" -> "-"), false, "GMT") - val converter = - parser.makeConverter("_1", t, nullable = false, options = options) + val converter = getParser(options) + .makeConverter("_1", t, nullable = false, options = options) var message = intercept[RuntimeException] { converter.apply("-") }.getMessage @@ -83,22 +86,25 @@ class UnivocityParserSuite extends SparkFunSuite { // null. Seq(true, false).foreach { b => val options = new CSVOptions(Map("nullValue" -> "null"), false, "GMT") - val converter = - parser.makeConverter("_1", StringType, nullable = b, options = options) + val converter = getParser(options) + .makeConverter("_1", StringType, nullable = b, options = options) assert(converter.apply("") == UTF8String.fromString("")) } } test("Throws exception for empty string with non null type") { - val options = new CSVOptions(Map.empty[String, String], false, "GMT") + val options = new CSVOptions(Map.empty[String, String], false, "GMT") val exception = intercept[RuntimeException]{ - parser.makeConverter("_1", IntegerType, nullable = false, options = options).apply("") + getParser(options) + .makeConverter("_1", IntegerType, nullable = false, options = options) + .apply("") } assert(exception.getMessage.contains("null value found but field _1 is not nullable.")) } test("Types are cast correctly") { val options = new CSVOptions(Map.empty[String, String], false, "GMT") + val parser = getParser(options) assert(parser.makeConverter("_1", ByteType, options = options).apply("10") == 10) assert(parser.makeConverter("_1", ShortType, options = options).apply("10") == 10) assert(parser.makeConverter("_1", IntegerType, options = options).apply("10") == 10) @@ -111,17 +117,17 @@ class UnivocityParserSuite extends SparkFunSuite { new CSVOptions(Map("timestampFormat" -> "dd/MM/yyyy hh:mm"), false, "GMT") val customTimestamp = "31/01/2015 00:00" val expectedTime = timestampsOptions.timestampFormat.parse(customTimestamp).getTime - val castedTimestamp = - parser.makeConverter("_1", TimestampType, nullable = true, options = timestampsOptions) - .apply(customTimestamp) + val castedTimestamp = getParser(timestampsOptions) + .makeConverter("_1", TimestampType, nullable = true, options = timestampsOptions) + .apply(customTimestamp) assert(castedTimestamp == expectedTime * 1000L) val customDate = "31/01/2015" val dateOptions = new CSVOptions(Map("dateFormat" -> "dd/MM/yyyy"), false, "GMT") val expectedDate = dateOptions.dateFormat.parse(customDate).getTime - val castedDate = - parser.makeConverter("_1", DateType, nullable = true, options = dateOptions) - .apply(customTimestamp) + val castedDate = getParser(dateOptions) + .makeConverter("_1", DateType, nullable = true, options = dateOptions) + .apply(customTimestamp) assert(castedDate == DateTimeUtils.millisToDays(expectedDate)) val timestamp = "2015-01-01 00:00:00" @@ -138,7 +144,7 @@ class UnivocityParserSuite extends SparkFunSuite { types.foreach { dt => input.foreach { v => val message = intercept[NumberFormatException] { - parser.makeConverter("_1", dt, options = options).apply(v) + getParser(options).makeConverter("_1", dt, options = options).apply(v) }.getMessage assert(message.contains(v)) } @@ -147,7 +153,7 @@ class UnivocityParserSuite extends SparkFunSuite { test("Float NaN values are parsed correctly") { val options = new CSVOptions(Map("nanValue" -> "nn"), false, "GMT") - val floatVal: Float = parser.makeConverter( + val floatVal: Float = getParser(options).makeConverter( "_1", FloatType, nullable = true, options = options ).apply("nn").asInstanceOf[Float] @@ -158,7 +164,7 @@ class UnivocityParserSuite extends SparkFunSuite { test("Double NaN values are parsed correctly") { val options = new CSVOptions(Map("nanValue" -> "-"), false, "GMT") - val doubleVal: Double = parser.makeConverter( + val doubleVal: Double = getParser(options).makeConverter( "_1", DoubleType, nullable = true, options = options ).apply("-").asInstanceOf[Double] @@ -167,14 +173,14 @@ class UnivocityParserSuite extends SparkFunSuite { test("Float infinite values can be parsed") { val negativeInfOptions = new CSVOptions(Map("negativeInf" -> "max"), false, "GMT") - val floatVal1 = parser.makeConverter( + val floatVal1 = getParser(negativeInfOptions).makeConverter( "_1", FloatType, nullable = true, options = negativeInfOptions ).apply("max").asInstanceOf[Float] assert(floatVal1 == Float.NegativeInfinity) val positiveInfOptions = new CSVOptions(Map("positiveInf" -> "max"), false, "GMT") - val floatVal2 = parser.makeConverter( + val floatVal2 = getParser(positiveInfOptions).makeConverter( "_1", FloatType, nullable = true, options = positiveInfOptions ).apply("max").asInstanceOf[Float] @@ -183,18 +189,17 @@ class UnivocityParserSuite extends SparkFunSuite { test("Double infinite values can be parsed") { val negativeInfOptions = new CSVOptions(Map("negativeInf" -> "max"), false, "GMT") - val doubleVal1 = parser.makeConverter( + val doubleVal1 = getParser(negativeInfOptions).makeConverter( "_1", DoubleType, nullable = true, options = negativeInfOptions ).apply("max").asInstanceOf[Double] assert(doubleVal1 == Double.NegativeInfinity) val positiveInfOptions = new CSVOptions(Map("positiveInf" -> "max"), false, "GMT") - val doubleVal2 = parser.makeConverter( + val doubleVal2 = getParser(positiveInfOptions).makeConverter( "_1", DoubleType, nullable = true, options = positiveInfOptions ).apply("max").asInstanceOf[Double] assert(doubleVal2 == Double.PositiveInfinity) } - } From 69c1bb4c266a7e5c312b646f837d1f19388e3ed6 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 20:26:53 +0300 Subject: [PATCH 13/21] Add a negative test --- .../catalyst/util/DateTimeUtilsSuite.scala | 58 +++++++++++-------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 463a367ec87e..8bf3bc3488c6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -696,29 +696,41 @@ class DateTimeUtilsSuite extends SparkFunSuite { } test("fast parse to micros") { - val timeZone = TimeZoneUTC - def check(pattern: String, input: String, reference: String): Unit = { - val parser = getDateTimeParser( - FastDateFormat.getInstance(pattern, timeZone, Locale.US), - timeZone) - val expected = DateTimeUtils.stringToTimestamp( - UTF8String.fromString(reference), timeZone).get - val actual = parser.parse(input) - assert(actual === expected) + DateTimeTestUtils.outstandingTimezones.foreach { timeZone => + def check(pattern: String, input: String, reference: String): Unit = { + val parser = getDateTimeParser( + FastDateFormat.getInstance(pattern, timeZone, Locale.US), + timeZone) + val expected = DateTimeUtils.stringToTimestamp( + UTF8String.fromString(reference), timeZone).get + val actual = parser.parse(input) + assert(actual === expected) + } + + check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSXXX", + "2019-10-14T09:39:07.3220000Z", "2019-10-14T09:39:07.322Z") + check("yyyy-MM-dd'T'HH:mm:ss.SSSSSS", + "2019-10-14T09:39:07.322000", "2019-10-14T09:39:07.322") + check("yyyy-MM-dd'T'HH:mm:ss.SSSX", + "2019-10-14T09:39:07.322Z", "2019-10-14T09:39:07.322Z") + check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSX", + "2019-10-14T09:39:07.123456Z", "2019-10-14T09:39:07.123456Z") + check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSX", + "2019-10-14T09:39:07.000010Z", "2019-10-14T09:39:07.000010Z") + check("yyyy-MM-dd'T'HH:mm:ss.S", + "2019-10-14T09:39:07.1", "2019-10-14T09:39:07.1") + check("yyyy-MM-dd'T'HH:mm:ss.SS", + "2019-10-14T09:39:07.10", "2019-10-14T09:39:07.1") + + try { + getDateTimeParser( + FastDateFormat.getInstance("yyyy/MM/dd HH_mm_ss.SSSSSS", timeZone, Locale.US), + timeZone).parse("2019/11/14 20#25#30.123456") + fail("Expected to throw an exception for the invalid input") + } catch { + case e: IllegalArgumentException => + assert(e.getMessage.contains("is an invalid timestamp")) + } } - check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSSXXX", - "2019-10-14T09:39:07.3220000Z", "2019-10-14T09:39:07.322Z") - check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX", - "2019-10-14T09:39:07.322000Z", "2019-10-14T09:39:07.322Z") - check("yyyy-MM-dd'T'HH:mm:ss.SSSX", - "2019-10-14T09:39:07.322Z", "2019-10-14T09:39:07.322Z") - check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSX", - "2019-10-14T09:39:07.123456Z", "2019-10-14T09:39:07.123456Z") - check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSX", - "2019-10-14T09:39:07.000010Z", "2019-10-14T09:39:07.000010Z") - check("yyyy-MM-dd'T'HH:mm:ss.SX", - "2019-10-14T09:39:07.1Z", "2019-10-14T09:39:07.1Z") - check("yyyy-MM-dd'T'HH:mm:ss.SSX", - "2019-10-14T09:39:07.10Z", "2019-10-14T09:39:07.1Z") } } From 0bdb5238c9ae9799db45fed625f2e80f4c1d0091 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 20:48:09 +0300 Subject: [PATCH 14/21] Add a CSV test --- .../sql/execution/datasources/csv/CSVSuite.scala | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index d714cb2433ad..95c9dc5b7467 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -1875,4 +1875,16 @@ class CSVSuite extends QueryTest with SharedSQLContext with SQLTestUtils with Te } } } + + test("parse timestamp in microsecond precision") { + withTempPath { path => + val t = "2019-11-14 20:35:30.123456" + Seq(t).toDF("t").write.text(path.getAbsolutePath) + val readback = spark.read + .schema("t timestamp") + .option("timestampFormat", "yyyy-MM-dd HH:mm:ss.SSSSSS") + .csv(path.getAbsolutePath) + checkAnswer(readback, Row(Timestamp.valueOf(t))) + } + } } From 7d9a19dbccac20a1e801b535b6b0d8350a018ed7 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Thu, 14 Nov 2019 23:27:31 +0300 Subject: [PATCH 15/21] Add comments --- .../sql/catalyst/util/DateTimeUtils.scala | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 505fa67ce711..8393546d3833 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1168,16 +1168,37 @@ object DateTimeUtils { threadLocalDateFormat.remove() } + /** + * The custom sub-class of `GregorianCalendar` is needed to get access to + * the array of parsed `fields` immediately after parsing. We cannot use + * the `get()` method because it performs normalization of the fraction + * part. Accordingly, the `MILLISECOND` field doesn't contain original value. + */ class MicrosCalendar(tz: TimeZone) extends GregorianCalendar(tz, Locale.US) { + // Converts parsed `MILLISECOND` field to seconds fraction in microsecond precision. + // For example if the fraction pattern is `SSSS` then `digitsInFraction` = 4, and + // if the `MILLISECOND` field was parsed to `1234`. def getMicros(digitsInFraction: Int): SQLTimestamp = { + // Append `digitsInFraction` zeros to the field: 1234 -> 1234000000 val d = fields(Calendar.MILLISECOND) * MICROS_PER_SECOND + // Take the first 6 digits from `d`: 1234000000 -> 123400 + // The rest contains exactly `digitsInFraction`: `0000` = 10 ^ digitsInFraction + // So, the result is `(1234 * 1000000) / (10 ^ digitsInFraction) d / Decimal.POW_10(digitsInFraction) } } + /** + * An instance of the class is aimed to re-use many times. It contains helper objects + * that can be reused between `parse()` invokes. + * @param format The parser itself. + * @param digitsInFraction The number of digits in the seconds fraction precalculated + * from the pattern. For `ss.SSSS`, it is 4. + * @param cal The calendar which can get microseconds from the second fraction. + */ class DateTimeParser(format: FastDateFormat, digitsInFraction: Int, cal: MicrosCalendar) { def parse(s: String): SQLTimestamp = { - cal.clear() + cal.clear() // Clear the calendar because it can be re-used many times if (!format.parse(s, new ParsePosition(0), cal)) { throw new IllegalArgumentException(s"'$s' is an invalid timestamp") } From 810a061e6ec97ee97f3a9c86a967bc12cf8c177d Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 15 Nov 2019 09:49:18 +0300 Subject: [PATCH 16/21] getDateTimeParser -> getTimestampParser --- .../org/apache/spark/sql/catalyst/json/JacksonParser.scala | 4 ++-- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- .../apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 4 ++-- .../spark/sql/execution/datasources/csv/UnivocityParser.scala | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 8711900f2b36..2fecc5d3c926 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -29,7 +29,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils.getDateTimeParser +import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimestampParser import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils @@ -55,7 +55,7 @@ class JacksonParser( options.setJacksonOptions(factory) @transient private lazy val timestampParser = - getDateTimeParser(options.timestampFormat, options.timeZone) + getTimestampParser(options.timestampFormat, options.timeZone) /** * Create a converter which converts the JSON documents held by the `JsonParser` diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 8393546d3833..a05125bb3938 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1208,7 +1208,7 @@ object DateTimeUtils { } } - def getDateTimeParser(format: FastDateFormat, tz: TimeZone): DateTimeParser = { + def getTimestampParser(format: FastDateFormat, tz: TimeZone): DateTimeParser = { new DateTimeParser(format, format.getPattern.count(_ == 'S'), new MicrosCalendar(tz)) } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 8bf3bc3488c6..53f5871c3527 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -698,7 +698,7 @@ class DateTimeUtilsSuite extends SparkFunSuite { test("fast parse to micros") { DateTimeTestUtils.outstandingTimezones.foreach { timeZone => def check(pattern: String, input: String, reference: String): Unit = { - val parser = getDateTimeParser( + val parser = getTimestampParser( FastDateFormat.getInstance(pattern, timeZone, Locale.US), timeZone) val expected = DateTimeUtils.stringToTimestamp( @@ -723,7 +723,7 @@ class DateTimeUtilsSuite extends SparkFunSuite { "2019-10-14T09:39:07.10", "2019-10-14T09:39:07.1") try { - getDateTimeParser( + getTimestampParser( FastDateFormat.getInstance("yyyy/MM/dd HH_mm_ss.SSSSSS", timeZone, Locale.US), timeZone).parse("2019/11/14 20#25#30.123456") fail("Expected to throw an exception for the invalid input") diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala index 2c0a627c1ea2..a61cf769444c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala @@ -29,7 +29,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{BadRecordException, DateTimeUtils} -import org.apache.spark.sql.catalyst.util.DateTimeUtils.getDateTimeParser +import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimestampParser import org.apache.spark.sql.execution.datasources.FailureSafeParser import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -79,7 +79,7 @@ class UnivocityParser( private val row = new GenericInternalRow(requiredSchema.length) @transient private lazy val timestampParser = - getDateTimeParser(options.timestampFormat, options.timeZone) + getTimestampParser(options.timestampFormat, options.timeZone) // Retrieve the raw record string. private def getCurrentInput: UTF8String = { From 1a6608fb35903127ad96cf68b2f9e5d2dd181556 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 15 Nov 2019 09:50:43 +0300 Subject: [PATCH 17/21] Fix a comment --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index a05125bb3938..7bf7fe13d230 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1179,7 +1179,7 @@ object DateTimeUtils { // For example if the fraction pattern is `SSSS` then `digitsInFraction` = 4, and // if the `MILLISECOND` field was parsed to `1234`. def getMicros(digitsInFraction: Int): SQLTimestamp = { - // Append `digitsInFraction` zeros to the field: 1234 -> 1234000000 + // Append 6 zeros to the field: 1234 -> 1234000000 val d = fields(Calendar.MILLISECOND) * MICROS_PER_SECOND // Take the first 6 digits from `d`: 1234000000 -> 123400 // The rest contains exactly `digitsInFraction`: `0000` = 10 ^ digitsInFraction From 31d78776cc1750ffdf3d741e9278b115abb85345 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 15 Nov 2019 09:52:43 +0300 Subject: [PATCH 18/21] the array of parsed `fields` -> protected `fields` --- .../org/apache/spark/sql/catalyst/util/DateTimeUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 7bf7fe13d230..ab0c021ee191 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1170,7 +1170,7 @@ object DateTimeUtils { /** * The custom sub-class of `GregorianCalendar` is needed to get access to - * the array of parsed `fields` immediately after parsing. We cannot use + * protected `fields` immediately after parsing. We cannot use * the `get()` method because it performs normalization of the fraction * part. Accordingly, the `MILLISECOND` field doesn't contain original value. */ From 8db95cae456f3ed8a496a383297082b7e2a10354 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 15 Nov 2019 09:55:23 +0300 Subject: [PATCH 19/21] Change expected value in a test --- .../org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 53f5871c3527..75ebc6c4d074 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -716,7 +716,7 @@ class DateTimeUtilsSuite extends SparkFunSuite { check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSX", "2019-10-14T09:39:07.123456Z", "2019-10-14T09:39:07.123456Z") check("yyyy-MM-dd'T'HH:mm:ss.SSSSSSX", - "2019-10-14T09:39:07.000010Z", "2019-10-14T09:39:07.000010Z") + "2019-10-14T09:39:07.000010Z", "2019-10-14T09:39:07.00001Z") check("yyyy-MM-dd'T'HH:mm:ss.S", "2019-10-14T09:39:07.1", "2019-10-14T09:39:07.1") check("yyyy-MM-dd'T'HH:mm:ss.SS", From 18525117051143a4306bdcfcbfdd878f40983799 Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 15 Nov 2019 10:12:10 +0300 Subject: [PATCH 20/21] Put `digitsInFraction` and `cal` inside of `TimestampParser` --- .../spark/sql/catalyst/json/JacksonParser.scala | 5 ++--- .../spark/sql/catalyst/util/DateTimeUtils.scala | 17 ++++++----------- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 10 ++++------ .../datasources/csv/UnivocityParser.scala | 5 ++--- 4 files changed, 14 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index 2fecc5d3c926..616de86aa682 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -29,7 +29,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimestampParser +import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimestampParser import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String import org.apache.spark.util.Utils @@ -54,8 +54,7 @@ class JacksonParser( private val factory = new JsonFactory() options.setJacksonOptions(factory) - @transient private lazy val timestampParser = - getTimestampParser(options.timestampFormat, options.timeZone) + @transient private lazy val timestampParser = new TimestampParser(options.timestampFormat) /** * Create a converter which converts the JSON documents held by the `JsonParser` diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index ab0c021ee191..d2bb5954914e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -1174,7 +1174,7 @@ object DateTimeUtils { * the `get()` method because it performs normalization of the fraction * part. Accordingly, the `MILLISECOND` field doesn't contain original value. */ - class MicrosCalendar(tz: TimeZone) extends GregorianCalendar(tz, Locale.US) { + private class MicrosCalendar(tz: TimeZone) extends GregorianCalendar(tz, Locale.US) { // Converts parsed `MILLISECOND` field to seconds fraction in microsecond precision. // For example if the fraction pattern is `SSSS` then `digitsInFraction` = 4, and // if the `MILLISECOND` field was parsed to `1234`. @@ -1190,13 +1190,12 @@ object DateTimeUtils { /** * An instance of the class is aimed to re-use many times. It contains helper objects - * that can be reused between `parse()` invokes. - * @param format The parser itself. - * @param digitsInFraction The number of digits in the seconds fraction precalculated - * from the pattern. For `ss.SSSS`, it is 4. - * @param cal The calendar which can get microseconds from the second fraction. + * `cal` and `digitsInFraction` that are reused between `parse()` invokes. */ - class DateTimeParser(format: FastDateFormat, digitsInFraction: Int, cal: MicrosCalendar) { + class TimestampParser(format: FastDateFormat) { + private val digitsInFraction = format.getPattern.count(_ == 'S') + private val cal = new MicrosCalendar(format.getTimeZone) + def parse(s: String): SQLTimestamp = { cal.clear() // Clear the calendar because it can be re-used many times if (!format.parse(s, new ParsePosition(0), cal)) { @@ -1207,8 +1206,4 @@ object DateTimeUtils { cal.getTimeInMillis * MICROS_PER_MILLIS + micros } } - - def getTimestampParser(format: FastDateFormat, tz: TimeZone): DateTimeParser = { - new DateTimeParser(format, format.getPattern.count(_ == 'S'), new MicrosCalendar(tz)) - } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 75ebc6c4d074..835b56a1d75c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -698,9 +698,7 @@ class DateTimeUtilsSuite extends SparkFunSuite { test("fast parse to micros") { DateTimeTestUtils.outstandingTimezones.foreach { timeZone => def check(pattern: String, input: String, reference: String): Unit = { - val parser = getTimestampParser( - FastDateFormat.getInstance(pattern, timeZone, Locale.US), - timeZone) + val parser = new TimestampParser(FastDateFormat.getInstance(pattern, timeZone, Locale.US)) val expected = DateTimeUtils.stringToTimestamp( UTF8String.fromString(reference), timeZone).get val actual = parser.parse(input) @@ -723,9 +721,9 @@ class DateTimeUtilsSuite extends SparkFunSuite { "2019-10-14T09:39:07.10", "2019-10-14T09:39:07.1") try { - getTimestampParser( - FastDateFormat.getInstance("yyyy/MM/dd HH_mm_ss.SSSSSS", timeZone, Locale.US), - timeZone).parse("2019/11/14 20#25#30.123456") + new TimestampParser( + FastDateFormat.getInstance("yyyy/MM/dd HH_mm_ss.SSSSSS", timeZone, Locale.US)) + .parse("2019/11/14 20#25#30.123456") fail("Expected to throw an exception for the invalid input") } catch { case e: IllegalArgumentException => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala index a61cf769444c..e847e408c7f1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/UnivocityParser.scala @@ -29,7 +29,7 @@ import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.GenericInternalRow import org.apache.spark.sql.catalyst.util.{BadRecordException, DateTimeUtils} -import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimestampParser +import org.apache.spark.sql.catalyst.util.DateTimeUtils.TimestampParser import org.apache.spark.sql.execution.datasources.FailureSafeParser import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -78,8 +78,7 @@ class UnivocityParser( private val row = new GenericInternalRow(requiredSchema.length) - @transient private lazy val timestampParser = - getTimestampParser(options.timestampFormat, options.timeZone) + @transient private lazy val timestampParser = new TimestampParser(options.timestampFormat) // Retrieve the raw record string. private def getCurrentInput: UTF8String = { From 1cbd35b672c8e47d7d93859f0d494aeb8ac8fb8b Mon Sep 17 00:00:00 2001 From: Maxim Gekk Date: Fri, 15 Nov 2019 21:30:31 +0300 Subject: [PATCH 21/21] Rename a test --- .../org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 835b56a1d75c..ced003c6ef2e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -695,7 +695,7 @@ class DateTimeUtilsSuite extends SparkFunSuite { } } - test("fast parse to micros") { + test("parsing timestamp strings up to microsecond precision") { DateTimeTestUtils.outstandingTimezones.foreach { timeZone => def check(pattern: String, input: String, reference: String): Unit = { val parser = new TimestampParser(FastDateFormat.getInstance(pattern, timeZone, Locale.US))