Skip to content
Closed
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.util
import java.sql.{Date, Timestamp}
import java.text.{DateFormat, SimpleDateFormat}
import java.util.{Calendar, TimeZone}
import java.util.concurrent.ConcurrentHashMap
import javax.xml.bind.DatatypeConverter

import org.apache.spark.unsafe.types.UTF8String
Expand Down Expand Up @@ -55,10 +56,19 @@ object DateTimeUtils {
// this is year -17999, calculation: 50 * daysIn400Year
final val YearZero = -17999
final val toYearZero = to2001 + 7304850
final val TimeZoneGMT = TimeZone.getTimeZone("GMT")

@transient lazy val defaultTimeZone = TimeZone.getDefault

// Reuse the TimeZone object as it is expensive to create in each method call.
final val timeZones = new ConcurrentHashMap[String, TimeZone]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This map could be quite big, because the string varies. Actually ZoneInfoFile does provide a cache for different IDs. Let's find out whether the boost you mentioned comes from reusing TimeZone or Calendar instances.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This map could be quite big, because the string varies. Actually ZoneInfoFile does provide a cache for different IDs. Let's find out whether the boost you mentioned comes from reusing TimeZone or Calendar instances.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

By use this map we can skip a lot of calls to getTimeZone, which is a synchronized method, ConcurrentHashMap can help improve performance, that's true. Do we need add a transient?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added transient. The total available timezone IDs should be limited.


// Reuse the Calendar object in each thread as it is expensive to create in each method call.
private val threadLocalCalendar = new ThreadLocal[Calendar] {
override protected def initialValue: Calendar = {
Calendar.getInstance
}
}

// Java TimeZone has no mention of thread safety. Use thread local instance to be safe.
private val threadLocalLocalTimeZone = new ThreadLocal[TimeZone] {
override protected def initialValue: TimeZone = {
Expand All @@ -80,6 +90,13 @@ object DateTimeUtils {
}
}

def getTimeZone(id: String): TimeZone = {
if (!timeZones.containsKey(id)) {
timeZones.put(id, TimeZone.getTimeZone(id))
}
timeZones.get(id)
}

// we should use the exact day as Int, for example, (year, month, day) -> day
def millisToDays(millisUtc: Long): SQLDate = {
// SPARK-6785: use Math.floor so negative number of days (dates before 1970)
Expand Down Expand Up @@ -339,12 +356,14 @@ object DateTimeUtils {
return None
}

val c = if (timeZone.isEmpty) {
Calendar.getInstance()
val c = threadLocalCalendar.get()
if (timeZone.isEmpty) {
c.setTimeZone(defaultTimeZone)
} else {
Calendar.getInstance(
TimeZone.getTimeZone(f"GMT${timeZone.get.toChar}${segments(7)}%02d:${segments(8)}%02d"))
c.setTimeZone(
getTimeZone(f"GMT${timeZone.get.toChar}${segments(7)}%02d:${segments(8)}%02d"))
}
c.clear()
c.set(Calendar.MILLISECOND, 0)

if (justTime) {
Expand Down Expand Up @@ -408,7 +427,9 @@ object DateTimeUtils {
segments(2) < 1 || segments(2) > 31) {
return None
}
val c = Calendar.getInstance(TimeZoneGMT)
val c = threadLocalCalendar.get()
c.setTimeZone(getTimeZone("GMT"))
c.clear()
c.set(segments(0), segments(1) - 1, segments(2), 0, 0, 0)
c.set(Calendar.MILLISECOND, 0)
Some((c.getTimeInMillis / MILLIS_PER_DAY).toInt)
Expand Down Expand Up @@ -825,7 +846,7 @@ object DateTimeUtils {
* representation in their timezone.
*/
def fromUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
val tz = TimeZone.getTimeZone(timeZone)
val tz = getTimeZone(timeZone)
val offset = tz.getOffset(time / 1000L)
time + offset * 1000L
}
Expand All @@ -835,7 +856,7 @@ object DateTimeUtils {
* string representation in their timezone.
*/
def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = {
val tz = TimeZone.getTimeZone(timeZone)
val tz = getTimeZone(timeZone)
val offset = tz.getOffset(time / 1000L)
time - offset * 1000L
}
Expand Down