-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-31489][SPARK-31488][SQL] Translate date values of pushed down filters to java.sql.Date
#28272
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-31489][SPARK-31488][SQL] Translate date values of pushed down filters to java.sql.Date
#28272
Changes from 1 commit
3dca84d
9ce9a34
32fb0ea
d52fe37
ac0b27a
ff2ca3f
2a82a10
d7b2ece
2973dd7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.parquet | |
| import java.math.{BigDecimal => JBigDecimal} | ||
| import java.nio.charset.StandardCharsets | ||
| import java.sql.{Date, Timestamp} | ||
| import java.time.LocalDate | ||
|
|
||
| import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate, Operators} | ||
| import org.apache.parquet.filter2.predicate.FilterApi._ | ||
|
|
@@ -1561,6 +1562,63 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared | |
| } | ||
| } | ||
| } | ||
|
|
||
| test("filter pushdown - local date") { | ||
|
||
| implicit class StringToDate(s: String) { | ||
| def date: LocalDate = LocalDate.parse(s) | ||
| } | ||
|
|
||
| val data = Seq("2018-03-18", "2018-03-19", "2018-03-20", "2018-03-21").map(_.date) | ||
| import testImplicits._ | ||
| withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") { | ||
| withNestedDataFrame(data.map(i => Tuple1(i)).toDF()) { case (inputDF, colName, resultFun) => | ||
| withParquetDataFrame(inputDF) { implicit df => | ||
| val dateAttr: Expression = df(colName).expr | ||
| assert(df(colName).expr.dataType === DateType) | ||
|
|
||
| checkFilterPredicate(dateAttr.isNull, classOf[Eq[_]], Seq.empty[Row]) | ||
| checkFilterPredicate(dateAttr.isNotNull, classOf[NotEq[_]], | ||
| data.map(i => Row.apply(resultFun(i)))) | ||
|
|
||
| checkFilterPredicate(dateAttr === "2018-03-18".date, classOf[Eq[_]], | ||
| resultFun("2018-03-18".date)) | ||
| checkFilterPredicate(dateAttr <=> "2018-03-18".date, classOf[Eq[_]], | ||
| resultFun("2018-03-18".date)) | ||
| checkFilterPredicate(dateAttr =!= "2018-03-18".date, classOf[NotEq[_]], | ||
| Seq("2018-03-19", "2018-03-20", "2018-03-21").map(i => Row.apply(resultFun(i.date)))) | ||
|
|
||
| checkFilterPredicate(dateAttr < "2018-03-19".date, classOf[Lt[_]], | ||
| resultFun("2018-03-18".date)) | ||
| checkFilterPredicate(dateAttr > "2018-03-20".date, classOf[Gt[_]], | ||
| resultFun("2018-03-21".date)) | ||
| checkFilterPredicate(dateAttr <= "2018-03-18".date, classOf[LtEq[_]], | ||
| resultFun("2018-03-18".date)) | ||
| checkFilterPredicate(dateAttr >= "2018-03-21".date, classOf[GtEq[_]], | ||
| resultFun("2018-03-21".date)) | ||
|
|
||
| checkFilterPredicate(Literal("2018-03-18".date) === dateAttr, classOf[Eq[_]], | ||
| resultFun("2018-03-18".date)) | ||
| checkFilterPredicate(Literal("2018-03-18".date) <=> dateAttr, classOf[Eq[_]], | ||
| resultFun("2018-03-18".date)) | ||
| checkFilterPredicate(Literal("2018-03-19".date) > dateAttr, classOf[Lt[_]], | ||
| resultFun("2018-03-18".date)) | ||
| checkFilterPredicate(Literal("2018-03-20".date) < dateAttr, classOf[Gt[_]], | ||
| resultFun("2018-03-21".date)) | ||
| checkFilterPredicate(Literal("2018-03-18".date) >= dateAttr, classOf[LtEq[_]], | ||
| resultFun("2018-03-18".date)) | ||
| checkFilterPredicate(Literal("2018-03-21".date) <= dateAttr, classOf[GtEq[_]], | ||
| resultFun("2018-03-21".date)) | ||
|
|
||
| checkFilterPredicate(!(dateAttr < "2018-03-21".date), classOf[GtEq[_]], | ||
| resultFun("2018-03-21".date)) | ||
| checkFilterPredicate( | ||
| dateAttr < "2018-03-19".date || dateAttr > "2018-03-20".date, | ||
| classOf[Operators.Or], | ||
| Seq(Row(resultFun("2018-03-18".date)), Row(resultFun("2018-03-21".date)))) | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| class ParquetV1FilterSuite extends ParquetFilterSuite { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.orc | |
| import java.math.MathContext | ||
| import java.nio.charset.StandardCharsets | ||
| import java.sql.{Date, Timestamp} | ||
| import java.time.LocalDate | ||
|
|
||
| import scala.collection.JavaConverters._ | ||
|
|
||
|
|
@@ -450,5 +451,31 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { | |
| ).get.toString | ||
| } | ||
| } | ||
|
|
||
| test("filter pushdown - local date") { | ||
|
||
| val dates = Seq("2017-08-18", "2017-08-19", "2017-08-20", "2017-08-21").map { day => | ||
| LocalDate.parse(day) | ||
| } | ||
| withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") { | ||
| withOrcDataFrame(dates.map(Tuple1(_))) { implicit df => | ||
| checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) | ||
|
|
||
| checkFilterPredicate($"_1" === dates(0), PredicateLeaf.Operator.EQUALS) | ||
| checkFilterPredicate($"_1" <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS) | ||
|
|
||
| checkFilterPredicate($"_1" < dates(1), PredicateLeaf.Operator.LESS_THAN) | ||
| checkFilterPredicate($"_1" > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS) | ||
| checkFilterPredicate($"_1" <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS) | ||
| checkFilterPredicate($"_1" >= dates(3), PredicateLeaf.Operator.LESS_THAN) | ||
|
|
||
| checkFilterPredicate(Literal(dates(0)) === $"_1", PredicateLeaf.Operator.EQUALS) | ||
| checkFilterPredicate(Literal(dates(0)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) | ||
| checkFilterPredicate(Literal(dates(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN) | ||
| checkFilterPredicate(Literal(dates(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) | ||
| checkFilterPredicate(Literal(dates(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) | ||
| checkFilterPredicate(Literal(dates(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.orc | |
| import java.math.MathContext | ||
| import java.nio.charset.StandardCharsets | ||
| import java.sql.{Date, Timestamp} | ||
| import java.time.LocalDate | ||
|
|
||
| import scala.collection.JavaConverters._ | ||
|
|
||
|
|
@@ -451,5 +452,31 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession { | |
| ).get.toString | ||
| } | ||
| } | ||
|
|
||
| test("filter pushdown - local date") { | ||
|
||
| val dates = Seq("2017-08-18", "2017-08-19", "2017-08-20", "2017-08-21").map { day => | ||
| LocalDate.parse(day) | ||
| } | ||
| withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") { | ||
| withOrcDataFrame(dates.map(Tuple1(_))) { implicit df => | ||
| checkFilterPredicate($"_1".isNull, PredicateLeaf.Operator.IS_NULL) | ||
|
|
||
| checkFilterPredicate($"_1" === dates(0), PredicateLeaf.Operator.EQUALS) | ||
| checkFilterPredicate($"_1" <=> dates(0), PredicateLeaf.Operator.NULL_SAFE_EQUALS) | ||
|
|
||
| checkFilterPredicate($"_1" < dates(1), PredicateLeaf.Operator.LESS_THAN) | ||
| checkFilterPredicate($"_1" > dates(2), PredicateLeaf.Operator.LESS_THAN_EQUALS) | ||
| checkFilterPredicate($"_1" <= dates(0), PredicateLeaf.Operator.LESS_THAN_EQUALS) | ||
| checkFilterPredicate($"_1" >= dates(3), PredicateLeaf.Operator.LESS_THAN) | ||
|
|
||
| checkFilterPredicate(Literal(dates(0)) === $"_1", PredicateLeaf.Operator.EQUALS) | ||
| checkFilterPredicate(Literal(dates(0)) <=> $"_1", PredicateLeaf.Operator.NULL_SAFE_EQUALS) | ||
| checkFilterPredicate(Literal(dates(1)) > $"_1", PredicateLeaf.Operator.LESS_THAN) | ||
| checkFilterPredicate(Literal(dates(2)) < $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) | ||
| checkFilterPredicate(Literal(dates(0)) >= $"_1", PredicateLeaf.Operator.LESS_THAN_EQUALS) | ||
| checkFilterPredicate(Literal(dates(3)) <= $"_1", PredicateLeaf.Operator.LESS_THAN) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.