Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class Iso8601DateFormatter(
extends DateFormatter with DateTimeFormatterHelper {

@transient
private lazy val formatter = getOrCreateFormatter(pattern, locale)
private lazy val formatter = getOrCreateFormatter(pattern, locale, isParsing)

@transient
private lazy val legacyFormatter = DateFormatter.getLegacyFormatter(
Expand Down Expand Up @@ -132,7 +132,7 @@ object DateFormatter {
zoneId: ZoneId,
locale: Locale = defaultLocale,
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
isParsing: Boolean = true): DateFormatter = {
isParsing: Boolean): DateFormatter = {
val pattern = format.getOrElse(defaultPattern)
if (SQLConf.get.legacyTimeParserPolicy == LEGACY) {
getLegacyFormatter(pattern, zoneId, locale, legacyFormat)
Expand Down Expand Up @@ -166,10 +166,10 @@ object DateFormatter {
}

def apply(format: String, zoneId: ZoneId): DateFormatter = {
getFormatter(Some(format), zoneId)
getFormatter(Some(format), zoneId, isParsing = false)
}

def apply(zoneId: ZoneId): DateFormatter = {
getFormatter(None, zoneId)
getFormatter(None, zoneId, isParsing = false)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ trait DateTimeFormatterHelper {
protected def getOrCreateFormatter(
pattern: String,
locale: Locale,
isParsing: Boolean = false): DateTimeFormatter = {
isParsing: Boolean): DateTimeFormatter = {
val newPattern = convertIncompatiblePattern(pattern, isParsing)
val useVarLen = isParsing && newPattern.contains('S')
val key = (newPattern, locale, useVarLen)
Expand Down Expand Up @@ -252,7 +252,7 @@ private object DateTimeFormatterHelper {
* @param pattern The input pattern.
* @return The pattern for new parser
*/
def convertIncompatiblePattern(pattern: String, isParsing: Boolean = false): String = {
def convertIncompatiblePattern(pattern: String, isParsing: Boolean): String = {
val eraDesignatorContained = pattern.split("'").zipWithIndex.exists {
case (patternPart, index) =>
// Text can be quoted using single quotes, we only check the non-quote parts.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ class Iso8601TimestampFormatter(
zoneId: ZoneId,
locale: Locale,
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
needVarLengthSecondFraction: Boolean)
isParsing: Boolean)
extends TimestampFormatter with DateTimeFormatterHelper {
@transient
protected lazy val formatter: DateTimeFormatter =
getOrCreateFormatter(pattern, locale, needVarLengthSecondFraction)
getOrCreateFormatter(pattern, locale, isParsing)

@transient
protected lazy val legacyFormatter = TimestampFormatter.getLegacyFormatter(
Expand Down Expand Up @@ -122,7 +122,7 @@ class FractionTimestampFormatter(zoneId: ZoneId)
zoneId,
TimestampFormatter.defaultLocale,
LegacyDateFormats.FAST_DATE_FORMAT,
needVarLengthSecondFraction = false) {
isParsing = false) {

@transient
override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter
Expand Down Expand Up @@ -293,7 +293,7 @@ object TimestampFormatter {
zoneId: ZoneId,
locale: Locale = defaultLocale,
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
isParsing: Boolean = false): TimestampFormatter = {
isParsing: Boolean): TimestampFormatter = {
val pattern = format.getOrElse(defaultPattern)
if (SQLConf.get.legacyTimeParserPolicy == LEGACY) {
getLegacyFormatter(pattern, zoneId, locale, legacyFormat)
Expand Down Expand Up @@ -340,12 +340,12 @@ object TimestampFormatter {
def apply(
format: String,
zoneId: ZoneId,
isParsing: Boolean = false): TimestampFormatter = {
isParsing: Boolean): TimestampFormatter = {
getFormatter(Some(format), zoneId, isParsing = isParsing)
}

def apply(zoneId: ZoneId): TimestampFormatter = {
getFormatter(None, zoneId)
getFormatter(None, zoneId, isParsing = false)
}

def getFractionFormatter(zoneId: ZoneId): TimestampFormatter = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
private val JST_OPT = Option(JST.getId)

def toMillis(timestamp: String): Long = {
val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC)
val tf = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, isParsing = false)
DateTimeUtils.microsToMillis(tf.parse(timestamp))
}
val date = "2015-04-08 13:10:15"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,43 +23,49 @@ import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._
class DateTimeFormatterHelperSuite extends SparkFunSuite {

test("check incompatible pattern") {
assert(convertIncompatiblePattern("MM-DD-u") === "MM-DD-e")
assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz")
=== "uuuu-MM-dd'T'HH:mm:ss.SSSz")
assert(convertIncompatiblePattern("yyyy-MM'y contains in quoted text'HH:mm:ss")
=== "uuuu-MM'y contains in quoted text'HH:mm:ss")
assert(convertIncompatiblePattern("yyyy-MM-dd-u'T'HH:mm:ss.SSSz")
assert(convertIncompatiblePattern("MM-DD-u", isParsing = false) === "MM-DD-e")
assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz", isParsing = false) ===
"uuuu-MM-dd'T'HH:mm:ss.SSSz")
assert(convertIncompatiblePattern(
"yyyy-MM'y contains in quoted text'HH:mm:ss", isParsing = false) ===
"uuuu-MM'y contains in quoted text'HH:mm:ss")
assert(convertIncompatiblePattern("yyyy-MM-dd-u'T'HH:mm:ss.SSSz", isParsing = false)
=== "uuuu-MM-dd-e'T'HH:mm:ss.SSSz")
assert(convertIncompatiblePattern("yyyy-MM'u contains in quoted text'HH:mm:ss")
assert(
convertIncompatiblePattern("yyyy-MM'u contains in quoted text'HH:mm:ss", isParsing = false)
=== "uuuu-MM'u contains in quoted text'HH:mm:ss")
assert(convertIncompatiblePattern("yyyy-MM'u contains in quoted text'''''HH:mm:ss")
=== "uuuu-MM'u contains in quoted text'''''HH:mm:ss")
assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz G")
=== "yyyy-MM-dd'T'HH:mm:ss.SSSz G")
assert(convertIncompatiblePattern(
"yyyy-MM'u contains in quoted text'''''HH:mm:ss", isParsing = false) ===
"uuuu-MM'u contains in quoted text'''''HH:mm:ss")
assert(convertIncompatiblePattern("yyyy-MM-dd'T'HH:mm:ss.SSSz G", isParsing = false) ===
"yyyy-MM-dd'T'HH:mm:ss.SSSz G")
unsupportedLetters.foreach { l =>
val e = intercept[IllegalArgumentException](convertIncompatiblePattern(s"yyyy-MM-dd $l G"))
val e = intercept[IllegalArgumentException] {
convertIncompatiblePattern(s"yyyy-MM-dd $l G", isParsing = false)
}
assert(e.getMessage === s"Illegal pattern character: $l")
}
unsupportedLettersForParsing.foreach { l =>
val e = intercept[IllegalArgumentException] {
convertIncompatiblePattern(s"$l", isParsing = true)
}
assert(e.getMessage === s"Illegal pattern character: $l")
assert(convertIncompatiblePattern(s"$l").nonEmpty)
assert(convertIncompatiblePattern(s"$l", isParsing = false).nonEmpty)
}
unsupportedPatternLengths.foreach { style =>
val e1 = intercept[IllegalArgumentException] {
convertIncompatiblePattern(s"yyyy-MM-dd $style")
convertIncompatiblePattern(s"yyyy-MM-dd $style", isParsing = false)
}
assert(e1.getMessage === s"Too many pattern letters: ${style.head}")
val e2 = intercept[IllegalArgumentException] {
convertIncompatiblePattern(s"yyyy-MM-dd $style${style.head}")
convertIncompatiblePattern(s"yyyy-MM-dd $style${style.head}", isParsing = false)
}
assert(e2.getMessage === s"Too many pattern letters: ${style.head}")
}
assert(convertIncompatiblePattern("yyyy-MM-dd uuuu") === "uuuu-MM-dd eeee")
assert(convertIncompatiblePattern("yyyy-MM-dd EEEE") === "uuuu-MM-dd EEEE")
assert(convertIncompatiblePattern("yyyy-MM-dd'e'HH:mm:ss") === "uuuu-MM-dd'e'HH:mm:ss")
assert(convertIncompatiblePattern("yyyy-MM-dd'T'") === "uuuu-MM-dd'T'")
assert(convertIncompatiblePattern("yyyy-MM-dd uuuu", isParsing = false) === "uuuu-MM-dd eeee")
assert(convertIncompatiblePattern("yyyy-MM-dd EEEE", isParsing = false) === "uuuu-MM-dd EEEE")
assert(convertIncompatiblePattern("yyyy-MM-dd'e'HH:mm:ss", isParsing = false) ===
"uuuu-MM-dd'e'HH:mm:ss")
assert(convertIncompatiblePattern("yyyy-MM-dd'T'", isParsing = false) === "uuuu-MM-dd'T'")
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -199,4 +199,50 @@ class DateFormatterSuite extends SparkFunSuite with SQLHelper {
// SparkUpgradeException here.
intercept[SparkUpgradeException](formatter.parse("02-29"))
}

test("Disable week-based date fields and quarter fields for parsing") {

def checkSparkUpgrade(c: Char): Unit = {
intercept[SparkUpgradeException] {
DateFormatter(
c.toString,
UTC,
DateFormatter.defaultLocale,
LegacyDateFormats.SIMPLE_DATE_FORMAT,
isParsing = true)
}
assert(DateFormatter(
c.toString,
UTC,
DateFormatter.defaultLocale,
LegacyDateFormats.SIMPLE_DATE_FORMAT,
isParsing = false).format(0).nonEmpty)
}

def checkIllegalArg(c: Char): Unit = {
intercept[IllegalArgumentException] {
DateFormatter(
c.toString,
UTC,
DateFormatter.defaultLocale,
LegacyDateFormats.SIMPLE_DATE_FORMAT,
isParsing = true)
}

assert(DateFormatter(
c.toString,
UTC,
DateFormatter.defaultLocale,
LegacyDateFormats.SIMPLE_DATE_FORMAT,
isParsing = false).format(0).nonEmpty)
}

Seq('Y', 'W', 'w', 'E', 'u', 'F').foreach { l =>
checkSparkUpgrade(l)
}

Seq('q', 'Q').foreach { l =>
checkIllegalArg(l)
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
2177456523456789L,
11858049903010203L).foreach { micros =>
outstandingZoneIds.foreach { zoneId =>
val timestamp = TimestampFormatter(pattern, zoneId).format(micros)
val timestamp = TimestampFormatter(pattern, zoneId, isParsing = false).format(micros)
val parsed = TimestampFormatter(
pattern, zoneId, isParsing = true).parse(timestamp)
assert(micros === parsed)
Expand All @@ -120,14 +120,14 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
val pattern = "yyyy-MM-dd'T'HH:mm:ss.SSSSSS"
val micros = TimestampFormatter(
pattern, zoneId, isParsing = true).parse(timestamp)
val formatted = TimestampFormatter(pattern, zoneId).format(micros)
val formatted = TimestampFormatter(pattern, zoneId, isParsing = false).format(micros)
assert(timestamp === formatted)
}
}
}

test("case insensitive parsing of am and pm") {
val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", UTC)
val formatter = TimestampFormatter("yyyy MMM dd hh:mm:ss a", UTC, isParsing = false)
val micros = formatter.parse("2009 Mar 20 11:30:01 am")
assert(micros === date(2009, 3, 20, 11, 30, 1))
}
Expand Down Expand Up @@ -157,8 +157,8 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
assert(TimestampFormatter(UTC).format(micros) === "-0099-01-01 00:00:00")
assert(TimestampFormatter(UTC).format(instant) === "-0099-01-01 00:00:00")
withDefaultTimeZone(UTC) { // toJavaTimestamp depends on the default time zone
assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC).format(toJavaTimestamp(micros))
=== "0100-01-01 00:00:00 BC")
assert(TimestampFormatter("yyyy-MM-dd HH:mm:SS G", UTC, isParsing = false)
.format(toJavaTimestamp(micros)) === "0100-01-01 00:00:00 BC")
}
}

Expand Down Expand Up @@ -209,7 +209,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
"2019-10-14T09:39:07.1", "2019-10-14T09:39:07.1")

try {
TimestampFormatter("yyyy/MM/dd HH_mm_ss.SSSSSS", zoneId, true)
TimestampFormatter("yyyy/MM/dd HH_mm_ss.SSSSSS", zoneId, isParsing = true)
.parse("2019/11/14 20#25#30.123456")
fail("Expected to throw an exception for the invalid input")
} catch {
Expand All @@ -222,7 +222,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
test("formatting timestamp strings up to microsecond precision") {
outstandingZoneIds.foreach { zoneId =>
def check(pattern: String, input: String, expected: String): Unit = {
val formatter = TimestampFormatter(pattern, zoneId)
val formatter = TimestampFormatter(pattern, zoneId, isParsing = false)
val timestamp = stringToTimestamp(UTF8String.fromString(input), zoneId).get
val actual = formatter.format(timestamp)
assert(actual === expected)
Expand Down Expand Up @@ -259,7 +259,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
}

test("SPARK-30958: parse timestamp with negative year") {
val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, true)
val formatter1 = TimestampFormatter("yyyy-MM-dd HH:mm:ss", UTC, isParsing = true)
assert(formatter1.parse("-1234-02-22 02:22:22") === date(-1234, 2, 22, 2, 22, 22))

def assertParsingError(f: => Unit): Unit = {
Expand All @@ -272,7 +272,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
}

// "yyyy" with "G" can't parse negative year or year 0000.
val formatter2 = TimestampFormatter("G yyyy-MM-dd HH:mm:ss", UTC, true)
val formatter2 = TimestampFormatter("G yyyy-MM-dd HH:mm:ss", UTC, isParsing = true)
assertParsingError(formatter2.parse("BC -1234-02-22 02:22:22"))
assertParsingError(formatter2.parse("AC 0000-02-22 02:22:22"))

Expand Down Expand Up @@ -318,7 +318,7 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
test("parsing hour with various patterns") {
def createFormatter(pattern: String): TimestampFormatter = {
// Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid value range.
TimestampFormatter(pattern, UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false)
TimestampFormatter(pattern, UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, isParsing = false)
}

withClue("HH") {
Expand Down Expand Up @@ -377,38 +377,68 @@ class TimestampFormatterSuite extends SparkFunSuite with SQLHelper with Matchers
}

test("missing date fields") {
val formatter = TimestampFormatter("HH:mm:ss", UTC)
val formatter = TimestampFormatter("HH:mm:ss", UTC, isParsing = true)
val micros = formatter.parse("11:30:01")
assert(micros === date(1970, 1, 1, 11, 30, 1))
}

test("missing year field with invalid date") {
// Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid date.
val formatter = TimestampFormatter("MM-dd", UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, false)
val formatter =
TimestampFormatter("MM-dd", UTC, LegacyDateFormats.SIMPLE_DATE_FORMAT, isParsing = false)
withDefaultTimeZone(UTC)(intercept[DateTimeException](formatter.parse("02-29")))
}

test("missing am/pm field") {
val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC)
val formatter = TimestampFormatter("yyyy hh:mm:ss", UTC, isParsing = true)
val micros = formatter.parse("2009 11:30:01")
assert(micros === date(2009, 1, 1, 11, 30, 1))
}

test("missing time fields") {
val formatter = TimestampFormatter("yyyy HH", UTC)
val formatter = TimestampFormatter("yyyy HH", UTC, isParsing = true)
val micros = formatter.parse("2009 11")
assert(micros === date(2009, 1, 1, 11))
}

test("explicitly forbidden datetime patterns") {
// not support by the legacy one too
Seq("QQQQQ", "qqqqq", "A", "c", "e", "n", "N", "p").foreach { pattern =>
intercept[IllegalArgumentException](TimestampFormatter(pattern, UTC).format(0))
intercept[IllegalArgumentException](TimestampFormatter(pattern, UTC, isParsing = false)
.format(0))
}
// supported by the legacy one, then we will suggest users with SparkUpgradeException
Seq("GGGGG", "MMMMM", "LLLLL", "EEEEE", "uuuuu", "aa", "aaa", "y" * 11, "y" * 11)
Seq("GGGGG", "MMMMM", "LLLLL", "EEEEE", "uuuuu", "aa", "aaa", "y" * 11, "Y" * 11)
.foreach { pattern =>
intercept[SparkUpgradeException](TimestampFormatter(pattern, UTC).format(0))
intercept[SparkUpgradeException] {
TimestampFormatter(pattern, UTC, isParsing = false).format(0)
}
}
}

test("Disable week-based date fields and quarter fields for parsing") {

def checkSparkUpgrade(c: Char): Unit = {
intercept[SparkUpgradeException] {
TimestampFormatter(c.toString, UTC, isParsing = true)
}
assert(TimestampFormatter(c.toString, UTC, isParsing = false).format(0).nonEmpty)
}

def checkIllegalArg(c: Char): Unit = {
intercept[IllegalArgumentException] {
TimestampFormatter(c.toString, UTC, isParsing = true)
}

assert(TimestampFormatter(c.toString, UTC, isParsing = false).format(0).nonEmpty)
}

Seq('Y', 'W', 'w', 'E', 'u', 'F').foreach { l =>
checkSparkUpgrade(l)
}

Seq('q', 'Q').foreach { l =>
checkIllegalArg(l)
}
}
}
Loading