Skip to content

Commit 3cd4443

Browse files
committed
Use old style.
1 parent 4a372a3 commit 3cd4443

3 files changed

Lines changed: 38 additions & 18 deletions

File tree

sql/core/benchmarks/FilterPushdownBenchmark-results.txt

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -712,27 +712,27 @@ Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
712712

713713
Select 1 row with 1 filters: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
714714
------------------------------------------------------------------------------------------------
715-
Parquet Vectorized 167 / 183 0.0 166581369.0 1.0X
716-
Parquet Vectorized (Pushdown) 148 / 157 0.0 148497299.0 1.1X
717-
Native ORC Vectorized 142 / 151 0.0 142053680.0 1.2X
718-
Native ORC Vectorized (Pushdown) 142 / 148 0.0 142490700.0 1.2X
715+
Parquet Vectorized 158 / 182 0.0 158442969.0 1.0X
716+
Parquet Vectorized (Pushdown) 150 / 158 0.0 149718289.0 1.1X
717+
Native ORC Vectorized 141 / 148 0.0 141259852.0 1.1X
718+
Native ORC Vectorized (Pushdown) 142 / 147 0.0 142016472.0 1.1X
719719

720720
Java HotSpot(TM) 64-Bit Server VM 1.8.0_181-b13 on Mac OS X 10.13.6
721721
Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
722722

723723
Select 1 row with 250 filters: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
724724
------------------------------------------------------------------------------------------------
725-
Parquet Vectorized 1135 / 1147 0.0 1134508548.0 1.0X
726-
Parquet Vectorized (Pushdown) 1432 / 1442 0.0 1431916497.0 0.8X
727-
Native ORC Vectorized 1122 / 1128 0.0 1121722239.0 1.0X
728-
Native ORC Vectorized (Pushdown) 1175 / 1182 0.0 1175152267.0 1.0X
725+
Parquet Vectorized 1013 / 1026 0.0 1013194322.0 1.0X
726+
Parquet Vectorized (Pushdown) 1326 / 1332 0.0 1326301956.0 0.8X
727+
Native ORC Vectorized 1005 / 1010 0.0 1005266379.0 1.0X
728+
Native ORC Vectorized (Pushdown) 1068 / 1071 0.0 1067964993.0 0.9X
729729

730730
Java HotSpot(TM) 64-Bit Server VM 1.8.0_181-b13 on Mac OS X 10.13.6
731731
Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
732732

733733
Select 1 row with 500 filters: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
734734
------------------------------------------------------------------------------------------------
735-
Parquet Vectorized 4480 / 4517 0.0 4480162380.0 1.0X
736-
Parquet Vectorized (Pushdown) 5191 / 5222 0.0 5191121525.0 0.9X
737-
Native ORC Vectorized 4474 / 4485 0.0 4474218663.0 1.0X
738-
Native ORC Vectorized (Pushdown) 4704 / 4721 0.0 4704080940.0 1.0X
735+
Parquet Vectorized 3598 / 3614 0.0 3598001202.0 1.0X
736+
Parquet Vectorized (Pushdown) 4282 / 4333 0.0 4281849770.0 0.8X
737+
Native ORC Vectorized 3594 / 3619 0.0 3593551548.0 1.0X
738+
Native ORC Vectorized (Pushdown) 3834 / 3840 0.0 3834240570.0 0.9X

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilters.scala

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,9 +73,19 @@ private[sql] object OrcFilters {
7373
def createFilter(schema: StructType, filters: Seq[Filter]): Option[SearchArgument] = {
7474
val dataTypeMap = schema.map(f => f.name -> f.dataType).toMap
7575

76-
buildTree(filters.filter(buildSearchArgument(dataTypeMap, _, newBuilder).isDefined))
77-
.flatMap(buildSearchArgument(dataTypeMap, _, newBuilder))
78-
.map(_.build)
76+
// First, tries to convert each filter individually to see whether it's convertible, and then
77+
// collect all convertible ones to build the final `SearchArgument`.
78+
val convertibleFilters = for {
79+
filter <- filters
80+
_ <- buildSearchArgument(dataTypeMap, filter, newBuilder)
81+
} yield filter
82+
83+
for {
84+
// Combines all convertible filters using `And` to produce a single conjunction
85+
conjunction <- buildTree(convertibleFilters)
86+
// Then tries to build a single ORC `SearchArgument` for the conjunction predicate
87+
builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder)
88+
} yield builder.build()
7989
}
8090

8191
/**

sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,19 @@ private[orc] object OrcFilters extends Logging {
6060
def createFilter(schema: StructType, filters: Array[Filter]): Option[SearchArgument] = {
6161
val dataTypeMap = schema.map(f => f.name -> f.dataType).toMap
6262

63-
buildTree(filters.filter(buildSearchArgument(dataTypeMap, _, newBuilder).isDefined))
64-
.flatMap(buildSearchArgument(dataTypeMap, _, newBuilder))
65-
.map(_.build)
63+
// First, tries to convert each filter individually to see whether it's convertible, and then
64+
// collect all convertible ones to build the final `SearchArgument`.
65+
val convertibleFilters = for {
66+
filter <- filters
67+
_ <- buildSearchArgument(dataTypeMap, filter, newBuilder)
68+
} yield filter
69+
70+
for {
71+
// Combines all convertible filters using `And` to produce a single conjunction
72+
conjunction <- buildTree(convertibleFilters)
73+
// Then tries to build a single ORC `SearchArgument` for the conjunction predicate
74+
builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder)
75+
} yield builder.build()
6676
}
6777

6878
private def buildSearchArgument(

0 commit comments

Comments
 (0)