Skip to content

Commit ab1d57f

Browse files
committed
Support SimpleDateFormat as a legacy date parser
1 parent 38d90d8 commit ab1d57f

8 files changed

Lines changed: 73 additions & 39 deletions

File tree

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,10 @@ class CSVOptions(
146146
// A language tag in IETF BCP 47 format
147147
val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US)
148148

149-
val dateFormat: String = parameters.getOrElse("dateFormat", "uuuu-MM-dd")
149+
val dateFormat: String = parameters.getOrElse("dateFormat", DateFormatter.defaultPattern)
150150

151151
val timestampFormat: String =
152-
parameters.getOrElse("timestampFormat", "uuuu-MM-dd'T'HH:mm:ss.SSSXXX")
152+
parameters.getOrElse("timestampFormat", s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX")
153153

154154
val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
155155

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityGenerator.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ class UnivocityGenerator(
5050
private val dateFormatter = DateFormatter(
5151
options.dateFormat,
5252
options.zoneId,
53-
options.locale)
53+
options.locale,
54+
legacyFormat = FAST_DATE_FORMAT)
5455

5556
private def makeConverter(dataType: DataType): ValueConverter = dataType match {
5657
case DateType =>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,8 @@ class UnivocityParser(
9292
private val dateFormatter = DateFormatter(
9393
options.dateFormat,
9494
options.zoneId,
95-
options.locale)
95+
options.locale,
96+
legacyFormat = FAST_DATE_FORMAT)
9697

9798
private val csvFilters = new CSVFilters(filters, requiredSchema)
9899

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,10 @@ private[sql] class JSONOptions(
8888
val zoneId: ZoneId = DateTimeUtils.getZoneId(
8989
parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
9090

91-
val dateFormat: String = parameters.getOrElse("dateFormat", "uuuu-MM-dd")
91+
val dateFormat: String = parameters.getOrElse("dateFormat", DateFormatter.defaultPattern)
9292

9393
val timestampFormat: String =
94-
parameters.getOrElse("timestampFormat", "uuuu-MM-dd'T'HH:mm:ss.SSSXXX")
94+
parameters.getOrElse("timestampFormat", s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX")
9595

9696
val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
9797

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ private[sql] class JacksonGenerator(
8686
private val dateFormatter = DateFormatter(
8787
options.dateFormat,
8888
options.zoneId,
89-
options.locale)
89+
options.locale,
90+
legacyFormat = FAST_DATE_FORMAT)
9091

9192
private def makeWriter(dataType: DataType): ValueWriter = dataType match {
9293
case NullType =>

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ class JacksonParser(
6464
private val dateFormatter = DateFormatter(
6565
options.dateFormat,
6666
options.zoneId,
67-
options.locale)
67+
options.locale,
68+
legacyFormat = FAST_DATE_FORMAT)
6869

6970
/**
7071
* Create a converter which converts the JSON documents held by the `JsonParser`

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala

Lines changed: 51 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@
1717

1818
package org.apache.spark.sql.catalyst.util
1919

20+
import java.text.SimpleDateFormat
2021
import java.time.{LocalDate, ZoneId}
21-
import java.util.Locale
22+
import java.util.{Date, Locale}
2223

2324
import org.apache.commons.lang3.time.FastDateFormat
2425

@@ -51,41 +52,76 @@ class Iso8601DateFormatter(
5152
}
5253
}
5354

54-
class LegacyDateFormatter(pattern: String, locale: Locale) extends DateFormatter {
55-
@transient
56-
private lazy val format = FastDateFormat.getInstance(pattern, locale)
55+
trait LegacyDateFormatter extends DateFormatter {
56+
def parseToDate(s: String): Date
57+
def formatDate(d: Date): String
5758

5859
override def parse(s: String): Int = {
59-
val milliseconds = format.parse(s).getTime
60+
val milliseconds = parseToDate(s).getTime
6061
DateTimeUtils.millisToDays(milliseconds)
6162
}
6263

6364
override def format(days: Int): String = {
6465
val date = DateTimeUtils.toJavaDate(days)
65-
format.format(date)
66+
formatDate(date)
6667
}
6768
}
6869

70+
class LegacyFastDateFormatter(pattern: String, locale: Locale) extends LegacyDateFormatter {
71+
@transient
72+
private lazy val fdf = FastDateFormat.getInstance(pattern, locale)
73+
override def parseToDate(s: String): Date = fdf.parse(s)
74+
def formatDate(d: Date): String = fdf.format(d)
75+
}
76+
77+
class LegacySimpleDateFormatter(pattern: String, locale: Locale) extends LegacyDateFormatter {
78+
@transient
79+
private lazy val sdf = new SimpleDateFormat(pattern, locale)
80+
override def parseToDate(s: String): Date = sdf.parse(s)
81+
def formatDate(d: Date): String = sdf.format(d)
82+
}
83+
6984
object DateFormatter {
85+
import LegacyDateFormats._
86+
7087
val defaultLocale: Locale = Locale.US
7188

72-
def apply(format: String, zoneId: ZoneId, locale: Locale): DateFormatter = {
89+
def defaultPattern(): String = {
90+
if (SQLConf.get.legacyTimeParserEnabled) "yyyy-MM-dd" else "uuuu-MM-dd"
91+
}
92+
93+
private def getFormatter(
94+
format: Option[String],
95+
zoneId: ZoneId,
96+
locale: Locale = defaultLocale,
97+
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT): DateFormatter = {
98+
99+
val pattern = format.getOrElse(defaultPattern)
73100
if (SQLConf.get.legacyTimeParserEnabled) {
74-
new LegacyDateFormatter(format, locale)
101+
legacyFormat match {
102+
case FAST_DATE_FORMAT =>
103+
new LegacyFastDateFormatter(pattern, locale)
104+
case SIMPLE_DATE_FORMAT | LENIENT_SIMPLE_DATE_FORMAT =>
105+
new LegacySimpleDateFormatter(pattern, locale)
106+
}
75107
} else {
76-
new Iso8601DateFormatter(format, zoneId, locale)
108+
new Iso8601DateFormatter(pattern, zoneId, locale)
77109
}
78110
}
79111

112+
def apply(
113+
format: String,
114+
zoneId: ZoneId,
115+
locale: Locale,
116+
legacyFormat: LegacyDateFormat): DateFormatter = {
117+
getFormatter(Some(format), zoneId, defaultLocale, legacyFormat)
118+
}
119+
80120
def apply(format: String, zoneId: ZoneId): DateFormatter = {
81-
apply(format, zoneId, defaultLocale)
121+
getFormatter(Some(format), zoneId)
82122
}
83123

84124
def apply(zoneId: ZoneId): DateFormatter = {
85-
if (SQLConf.get.legacyTimeParserEnabled) {
86-
new LegacyDateFormatter("yyyy-MM-dd", defaultLocale)
87-
} else {
88-
new Iso8601DateFormatter("uuuu-MM-dd", zoneId, defaultLocale)
89-
}
125+
getFormatter(None, zoneId)
90126
}
91127
}

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ trait LegacyTimestampFormatter extends TimestampFormatter {
105105
}
106106
}
107107

108-
class LegacyFastDateFormatter(
108+
class LegacyFastTimestampFormatter(
109109
pattern: String,
110110
zoneId: ZoneId,
111111
locale: Locale) extends LegacyTimestampFormatter {
@@ -115,7 +115,7 @@ class LegacyFastDateFormatter(
115115
override def formatTimestamp(t: Timestamp): String = fdf.format(t)
116116
}
117117

118-
class LegacySimpleDateFormatter(
118+
class LegacySimpleTimestampFormatter(
119119
pattern: String,
120120
zoneId: ZoneId,
121121
locale: Locale,
@@ -140,29 +140,23 @@ object TimestampFormatter {
140140

141141
val defaultLocale: Locale = Locale.US
142142

143-
def defaultPattern(): String = {
144-
if (SQLConf.get.legacyTimeParserEnabled) {
145-
"yyyy-MM-dd HH:mm:ss"
146-
} else {
147-
"uuuu-MM-dd HH:mm:ss"
148-
}
149-
}
143+
def defaultPattern(): String = s"${DateFormatter.defaultPattern()} HH:mm:ss"
150144

151145
private def getFormatter(
152146
format: Option[String],
153147
zoneId: ZoneId,
154-
locale: Locale,
155-
legacyFormat: LegacyDateFormat): TimestampFormatter = {
148+
locale: Locale = defaultLocale,
149+
legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT): TimestampFormatter = {
156150

157151
val pattern = format.getOrElse(defaultPattern)
158152
if (SQLConf.get.legacyTimeParserEnabled) {
159153
legacyFormat match {
160154
case FAST_DATE_FORMAT =>
161-
new LegacyFastDateFormatter(pattern, zoneId, locale)
155+
new LegacyFastTimestampFormatter(pattern, zoneId, locale)
162156
case SIMPLE_DATE_FORMAT =>
163-
new LegacySimpleDateFormatter(pattern, zoneId, locale, lenient = false)
157+
new LegacySimpleTimestampFormatter(pattern, zoneId, locale, lenient = false)
164158
case LENIENT_SIMPLE_DATE_FORMAT =>
165-
new LegacySimpleDateFormatter(pattern, zoneId, locale, lenient = true)
159+
new LegacySimpleTimestampFormatter(pattern, zoneId, locale, lenient = true)
166160
}
167161
} else {
168162
new Iso8601TimestampFormatter(pattern, zoneId, locale)
@@ -178,11 +172,11 @@ object TimestampFormatter {
178172
}
179173

180174
def apply(format: String, zoneId: ZoneId): TimestampFormatter = {
181-
apply(format, zoneId, defaultLocale, LENIENT_SIMPLE_DATE_FORMAT)
175+
getFormatter(Some(format), zoneId)
182176
}
183177

184178
def apply(zoneId: ZoneId): TimestampFormatter = {
185-
getFormatter(None, zoneId, defaultLocale, LENIENT_SIMPLE_DATE_FORMAT)
179+
getFormatter(None, zoneId)
186180
}
187181

188182
def getFractionFormatter(zoneId: ZoneId): TimestampFormatter = {

0 commit comments

Comments
 (0)