From c4dda25ced4216ecd8824b3de0d0b8adfd95e299 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 22 Apr 2020 16:15:16 +0300 Subject: [PATCH 1/3] Add comments for filters --- .../org/apache/spark/sql/sources/filters.scala | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala index 319073e4475be..ac7bd49e2ee1a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala @@ -27,6 +27,20 @@ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.parseColumnPath /** * A filter predicate for data sources. * + * Filters with DATE or TIMESTAMP column attributes have values of Java types with + * date-time fields that are derived from filter date-time value at the default JVM time zone. + * - DATE filter values are instances of `java.sql.Date` that are constructed from local + * dates with fields (year, month, day). The local dates are derived from numbers of days + * since the epoch 1970-01-01 in Proleptic Gregorian calendar. + * - TIMESTAMP filter values are instances of `java.sql.Timestamp` that are constructed from + * local timestamps with the fields (year, month, day, hour, minute, second with fraction). + * The local timestamps are derived from microseconds since the epoch 1970-01-01 00:00:00Z + * representing the local timestamp at the default JVM time zone in Proleptic Gregorian + * calendar. + * Since Spark 3.0, date-time filters values are rebased via local dates/timestamps from + * Proleptic Gregorian calendar to the hybrid calendar (Julian + Gregorian since 1582-10-15) + * which Java classes `java.sql.Date` and `java.sql.Timestamp` are based on. + * * @since 1.3.0 */ @Stable From 2b5594ce31f6ac74e52ab9cff1aac3c7062db937 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 22 Apr 2020 20:12:36 +0300 Subject: [PATCH 2/3] Update comments for apply() and get() of Row --- .../src/main/scala/org/apache/spark/sql/Row.scala | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala index 9a7e077b658df..4487a2d7f4358 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -165,8 +165,11 @@ trait Row extends Serializable { * StringType -> String * DecimalType -> java.math.BigDecimal * - * DateType -> java.sql.Date - * TimestampType -> java.sql.Timestamp + * DateType -> java.sql.Date if spark.sql.datetime.java8API.enabled is false + * DateType -> java.time.LocalDate if spark.sql.datetime.java8API.enabled is true + * + * TimestampType -> java.sql.Timestamp if spark.sql.datetime.java8API.enabled is false + * TimestampType -> java.time.Instant if spark.sql.datetime.java8API.enabled is true * * BinaryType -> byte array * ArrayType -> scala.collection.Seq (use getList for java.util.List) @@ -190,8 +193,11 @@ trait Row extends Serializable { * StringType -> String * DecimalType -> java.math.BigDecimal * - * DateType -> java.sql.Date - * TimestampType -> java.sql.Timestamp + * DateType -> java.sql.Date if spark.sql.datetime.java8API.enabled is false + * DateType -> java.time.LocalDate if spark.sql.datetime.java8API.enabled is true + * + * TimestampType -> java.sql.Timestamp if spark.sql.datetime.java8API.enabled is false + * TimestampType -> java.time.Instant if spark.sql.datetime.java8API.enabled is true * * BinaryType -> byte array * ArrayType -> scala.collection.Seq (use getList for java.util.List) From 4ac780ea1a08458b434b1ca10e3de36e7ddb4e42 Mon Sep 17 00:00:00 2001 From: Max Gekk Date: Wed, 22 Apr 2020 20:28:24 +0300 Subject: [PATCH 3/3] Update comment for filter values --- .../org/apache/spark/sql/sources/filters.scala | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala index ac7bd49e2ee1a..7533793253513 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala @@ -25,21 +25,8 @@ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.parseColumnPath //////////////////////////////////////////////////////////////////////////////////////////////////// /** - * A filter predicate for data sources. - * - * Filters with DATE or TIMESTAMP column attributes have values of Java types with - * date-time fields that are derived from filter date-time value at the default JVM time zone. - * - DATE filter values are instances of `java.sql.Date` that are constructed from local - * dates with fields (year, month, day). The local dates are derived from numbers of days - * since the epoch 1970-01-01 in Proleptic Gregorian calendar. - * - TIMESTAMP filter values are instances of `java.sql.Timestamp` that are constructed from - * local timestamps with the fields (year, month, day, hour, minute, second with fraction). - * The local timestamps are derived from microseconds since the epoch 1970-01-01 00:00:00Z - * representing the local timestamp at the default JVM time zone in Proleptic Gregorian - * calendar. - * Since Spark 3.0, date-time filters values are rebased via local dates/timestamps from - * Proleptic Gregorian calendar to the hybrid calendar (Julian + Gregorian since 1582-10-15) - * which Java classes `java.sql.Date` and `java.sql.Timestamp` are based on. + * A filter predicate for data sources. Mapping between Spark SQL types and filter value + * types follow the convention for return type of [[org.apache.spark.sql.Row#get(int)]]. * * @since 1.3.0 */