Skip to content

Commit 406e5ec

Browse files
committed
Test filter creation with string expressions.
1 parent 5f49052 commit 406e5ec

1 file changed

Lines changed: 150 additions & 101 deletions

File tree

sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcFilterSuite.scala

Lines changed: 150 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,6 @@
1717

1818
package org.apache.spark.sql.hive.orc
1919

20-
import scala.collection.JavaConverters._
21-
22-
import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf
23-
2420
import org.apache.spark.sql.{Column, DataFrame, QueryTest}
2521
import org.apache.spark.sql.catalyst.dsl.expressions._
2622
import org.apache.spark.sql.catalyst.expressions._
@@ -31,10 +27,42 @@ import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, LogicalRe
3127
* A test suite that tests ORC filter API based filter pushdown optimization.
3228
*/
3329
class OrcFilterSuite extends QueryTest with OrcTest {
30+
// Because `ExpressionTree` is not accessible at Hive 1.2.x, this should be checked
31+
// in string form in order to check filter creation including logical expressions
32+
// such as `and`, `or` or `not`. So, this test uses `SearchArgument.toString()`
33+
// to produce string expression and then compare it to given templates below.
34+
// This might have to be changed after Hive version is upgraded.
35+
val isNullTmpl = """leaf-0 = (IS_NULL _1)
36+
|expr = leaf-0""".stripMargin.trim
37+
val isNotNullTmpl = """leaf-0 = (IS_NULL _1)
38+
|expr = (not leaf-0)""".stripMargin.trim
39+
val equalsTmpl = """leaf-0 = (EQUALS _1 %s)
40+
|expr = leaf-0""".stripMargin.trim
41+
val notEqualsTmpl = """leaf-0 = (EQUALS _1 %s)
42+
|expr = (not leaf-0)""".stripMargin.trim
43+
val nullSafeEqualsTmpl = """leaf-0 = (NULL_SAFE_EQUALS _1 %s)
44+
|expr = leaf-0""".stripMargin.trim
45+
val lessThenTmpl = """leaf-0 = (LESS_THAN _1 %s)
46+
|expr = leaf-0""".stripMargin.trim
47+
val greaterThenTmpl = """leaf-0 = (LESS_THAN_EQUALS _1 %s)
48+
|expr = (not leaf-0)""".stripMargin.trim
49+
val lessThenEqualsTmpl = """leaf-0 = (LESS_THAN_EQUALS _1 %s)
50+
|expr = leaf-0""".stripMargin.trim
51+
val greaterThenEqualsTmpl = """leaf-0 = (LESS_THAN _1 %s)
52+
|expr = (not leaf-0)""".stripMargin.trim
53+
val notLessThen = """leaf-0 = (LESS_THAN _1 %s)
54+
|expr = (not leaf-0)""".stripMargin.trim
55+
val andLessThenGreaterThenTmpl = """leaf-0 = (LESS_THAN _1 %s)
56+
|leaf-1 = (LESS_THAN_EQUALS _1 %s)
57+
|expr = (or leaf-0 (not leaf-1))""".stripMargin.trim
58+
val orLessThenGreaterThenTmpl = """leaf-0 = (LESS_THAN _1 %s)
59+
|leaf-1 = (LESS_THAN_EQUALS _1 %s)
60+
|expr = (and leaf-0 (not leaf-1))""".stripMargin.trim
61+
3462
private def checkFilterPredicate(
3563
df: DataFrame,
3664
predicate: Predicate,
37-
filterOperator: PredicateLeaf.Operator): Unit = {
65+
stringExpr: String): Unit = {
3866
val output = predicate.collect { case a: Attribute => a }.distinct
3967
val query = df
4068
.select(output.map(e => Column(e)): _*)
@@ -55,135 +83,155 @@ class OrcFilterSuite extends QueryTest with OrcTest {
5583
val maybeFilter = OrcFilters.createFilter(selectedFilters.toArray)
5684
assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $selectedFilters")
5785
maybeFilter.foreach { f =>
58-
val operator = f.getLeaves.asScala.head.getOperator
59-
assert(operator === filterOperator)
86+
assert(f.toString == stringExpr)
6087
}
6188
}
6289

6390
private def checkFilterPredicate
64-
(predicate: Predicate, filterOperator: PredicateLeaf.Operator)
91+
(predicate: Predicate, stringExpr: String)
6592
(implicit df: DataFrame): Unit = {
66-
checkFilterPredicate(df, predicate, filterOperator)
93+
checkFilterPredicate(df, predicate, stringExpr)
6794
}
6895

6996
test("filter pushdown - boolean") {
7097
withOrcDataFrame((true :: false :: Nil).map(b => Tuple1.apply(Option(b)))) { implicit df =>
71-
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
98+
checkFilterPredicate('_1.isNull, isNullTmpl)
99+
checkFilterPredicate('_1.isNotNull, isNotNullTmpl)
72100
}
73101
}
74102

75103
test("filter pushdown - integer") {
76104
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i)))) { implicit df =>
77-
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
78-
79-
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
80-
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
81-
82-
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
83-
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
84-
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
85-
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
86-
87-
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
88-
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
89-
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
90-
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
91-
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
92-
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
93-
94-
checkFilterPredicate(!('_1 < 4), PredicateLeaf.Operator.LESS_THAN)
105+
checkFilterPredicate('_1.isNull, isNullTmpl)
106+
checkFilterPredicate('_1.isNotNull, isNotNullTmpl)
107+
108+
checkFilterPredicate('_1 === 1, equalsTmpl.format(1))
109+
checkFilterPredicate('_1 <=> 1, nullSafeEqualsTmpl.format(1))
110+
checkFilterPredicate('_1 !== 1, notEqualsTmpl.format(1))
111+
112+
checkFilterPredicate('_1 < 2, lessThenTmpl.format(2))
113+
checkFilterPredicate('_1 > 3, greaterThenTmpl.format(3))
114+
checkFilterPredicate('_1 <= 1, lessThenEqualsTmpl.format(1))
115+
checkFilterPredicate('_1 >= 4, greaterThenEqualsTmpl.format(4))
116+
117+
checkFilterPredicate(Literal(1) === '_1, equalsTmpl.format(1))
118+
checkFilterPredicate(Literal(1) <=> '_1, nullSafeEqualsTmpl.format(1))
119+
checkFilterPredicate(Literal(2) > '_1, lessThenTmpl.format(2))
120+
checkFilterPredicate(Literal(3) < '_1, greaterThenTmpl.format(3))
121+
checkFilterPredicate(Literal(1) >= '_1, lessThenEqualsTmpl.format(1))
122+
checkFilterPredicate(Literal(4) <= '_1, greaterThenEqualsTmpl.format(4))
123+
124+
checkFilterPredicate(!('_1 < 4), notLessThen.format(4))
125+
checkFilterPredicate('_1 < 2 || '_1 > 3, andLessThenGreaterThenTmpl.format(2, 3))
126+
checkFilterPredicate('_1 < 2 && '_1 > 3, orLessThenGreaterThenTmpl.format(2, 3))
95127
}
96128
}
97129

98130
test("filter pushdown - long") {
99131
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toLong)))) { implicit df =>
100-
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
101-
102-
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
103-
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
104-
105-
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
106-
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
107-
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
108-
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
109-
110-
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
111-
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
112-
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
113-
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
114-
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
115-
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
116-
117-
checkFilterPredicate(!('_1 < 4), PredicateLeaf.Operator.LESS_THAN)
132+
checkFilterPredicate('_1.isNull, isNullTmpl)
133+
checkFilterPredicate('_1.isNotNull, isNotNullTmpl)
134+
135+
checkFilterPredicate('_1 === 1, equalsTmpl.format(1))
136+
checkFilterPredicate('_1 <=> 1, nullSafeEqualsTmpl.format(1))
137+
checkFilterPredicate('_1 !== 1, notEqualsTmpl.format(1))
138+
139+
checkFilterPredicate('_1 < 2, lessThenTmpl.format(2))
140+
checkFilterPredicate('_1 > 3, greaterThenTmpl.format(3))
141+
checkFilterPredicate('_1 <= 1, lessThenEqualsTmpl.format(1))
142+
checkFilterPredicate('_1 >= 4, greaterThenEqualsTmpl.format(4))
143+
144+
checkFilterPredicate(Literal(1) === '_1, equalsTmpl.format(1))
145+
checkFilterPredicate(Literal(1) <=> '_1, nullSafeEqualsTmpl.format(1))
146+
checkFilterPredicate(Literal(2) > '_1, lessThenTmpl.format(2))
147+
checkFilterPredicate(Literal(3) < '_1, greaterThenTmpl.format(3))
148+
checkFilterPredicate(Literal(1) >= '_1, lessThenEqualsTmpl.format(1))
149+
checkFilterPredicate(Literal(4) <= '_1, greaterThenEqualsTmpl.format(4))
150+
151+
checkFilterPredicate(!('_1 < 4), notLessThen.format(4))
152+
checkFilterPredicate('_1 < 2 || '_1 > 3, andLessThenGreaterThenTmpl.format(2, 3))
153+
checkFilterPredicate('_1 < 2 && '_1 > 3, orLessThenGreaterThenTmpl.format(2, 3))
118154
}
119155
}
120156

121157
test("filter pushdown - float") {
122158
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toFloat)))) { implicit df =>
123-
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
124-
125-
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
126-
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
127-
128-
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
129-
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
130-
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
131-
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
132-
133-
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
134-
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
135-
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
136-
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
137-
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
138-
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
139-
140-
checkFilterPredicate(!('_1 < 4), PredicateLeaf.Operator.LESS_THAN)
159+
checkFilterPredicate('_1.isNull, isNullTmpl)
160+
checkFilterPredicate('_1.isNotNull, isNotNullTmpl)
161+
162+
checkFilterPredicate('_1 === 1, equalsTmpl.format(1.0))
163+
checkFilterPredicate('_1 <=> 1, nullSafeEqualsTmpl.format(1.0))
164+
checkFilterPredicate('_1 !== 1, notEqualsTmpl.format(1.0))
165+
166+
checkFilterPredicate('_1 < 2, lessThenTmpl.format(2.0))
167+
checkFilterPredicate('_1 > 3, greaterThenTmpl.format(3.0))
168+
checkFilterPredicate('_1 <= 1, lessThenEqualsTmpl.format(1.0))
169+
checkFilterPredicate('_1 >= 4, greaterThenEqualsTmpl.format(4.0))
170+
171+
checkFilterPredicate(Literal(1) === '_1, equalsTmpl.format(1.0))
172+
checkFilterPredicate(Literal(1) <=> '_1, nullSafeEqualsTmpl.format(1.0))
173+
checkFilterPredicate(Literal(2) > '_1, lessThenTmpl.format(2.0))
174+
checkFilterPredicate(Literal(3) < '_1, greaterThenTmpl.format(3.0))
175+
checkFilterPredicate(Literal(1) >= '_1, lessThenEqualsTmpl.format(1.0))
176+
checkFilterPredicate(Literal(4) <= '_1, greaterThenEqualsTmpl.format(4.0))
177+
178+
checkFilterPredicate(!('_1 < 4), notLessThen.format(4.0))
179+
checkFilterPredicate('_1 < 2 || '_1 > 3, andLessThenGreaterThenTmpl.format(2.0, 3.0))
180+
checkFilterPredicate('_1 < 2 && '_1 > 3, orLessThenGreaterThenTmpl.format(2.0, 3.0))
141181
}
142182
}
143183

144184
test("filter pushdown - double") {
145185
withOrcDataFrame((1 to 4).map(i => Tuple1(Option(i.toDouble)))) { implicit df =>
146-
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
147-
148-
checkFilterPredicate('_1 === 1, PredicateLeaf.Operator.EQUALS)
149-
checkFilterPredicate('_1 <=> 1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
150-
151-
checkFilterPredicate('_1 < 2, PredicateLeaf.Operator.LESS_THAN)
152-
checkFilterPredicate('_1 > 3, PredicateLeaf.Operator.LESS_THAN_EQUALS)
153-
checkFilterPredicate('_1 <= 1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
154-
checkFilterPredicate('_1 >= 4, PredicateLeaf.Operator.LESS_THAN)
155-
156-
checkFilterPredicate(Literal(1) === '_1, PredicateLeaf.Operator.EQUALS)
157-
checkFilterPredicate(Literal(1) <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
158-
checkFilterPredicate(Literal(2) > '_1, PredicateLeaf.Operator.LESS_THAN)
159-
checkFilterPredicate(Literal(3) < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
160-
checkFilterPredicate(Literal(1) >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
161-
checkFilterPredicate(Literal(4) <= '_1, PredicateLeaf.Operator.LESS_THAN)
162-
163-
checkFilterPredicate(!('_1 < 4), PredicateLeaf.Operator.LESS_THAN)
186+
checkFilterPredicate('_1.isNull, isNullTmpl)
187+
checkFilterPredicate('_1.isNotNull, isNotNullTmpl)
188+
189+
checkFilterPredicate('_1 === 1, equalsTmpl.format(1.0))
190+
checkFilterPredicate('_1 <=> 1, nullSafeEqualsTmpl.format(1.0))
191+
checkFilterPredicate('_1 !== 1, notEqualsTmpl.format(1.0))
192+
193+
checkFilterPredicate('_1 < 2, lessThenTmpl.format(2.0))
194+
checkFilterPredicate('_1 > 3, greaterThenTmpl.format(3.0))
195+
checkFilterPredicate('_1 <= 1, lessThenEqualsTmpl.format(1.0))
196+
checkFilterPredicate('_1 >= 4, greaterThenEqualsTmpl.format(4.0))
197+
198+
checkFilterPredicate(Literal(1) === '_1, equalsTmpl.format(1.0))
199+
checkFilterPredicate(Literal(1) <=> '_1, nullSafeEqualsTmpl.format(1.0))
200+
checkFilterPredicate(Literal(2) > '_1, lessThenTmpl.format(2.0))
201+
checkFilterPredicate(Literal(3) < '_1, greaterThenTmpl.format(3.0))
202+
checkFilterPredicate(Literal(1) >= '_1, lessThenEqualsTmpl.format(1.0))
203+
checkFilterPredicate(Literal(4) <= '_1, greaterThenEqualsTmpl.format(4.0))
204+
205+
checkFilterPredicate(!('_1 < 4), notLessThen.format(4.0))
206+
checkFilterPredicate('_1 < 2 || '_1 > 3, andLessThenGreaterThenTmpl.format(2.0, 3.0))
207+
checkFilterPredicate('_1 < 2 && '_1 > 3, orLessThenGreaterThenTmpl.format(2.0, 3.0))
164208
}
165209
}
166210

167211
test("filter pushdown - string") {
168212
withOrcDataFrame((1 to 4).map(i => Tuple1(i.toString))) { implicit df =>
169-
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
170-
171-
checkFilterPredicate('_1 === "1", PredicateLeaf.Operator.EQUALS)
172-
checkFilterPredicate('_1 <=> "1", PredicateLeaf.Operator.NULL_SAFE_EQUALS)
173-
174-
checkFilterPredicate('_1 < "2", PredicateLeaf.Operator.LESS_THAN)
175-
checkFilterPredicate('_1 > "3", PredicateLeaf.Operator.LESS_THAN_EQUALS)
176-
checkFilterPredicate('_1 <= "1", PredicateLeaf.Operator.LESS_THAN_EQUALS)
177-
checkFilterPredicate('_1 >= "4", PredicateLeaf.Operator.LESS_THAN)
178-
179-
checkFilterPredicate(Literal("1") === '_1, PredicateLeaf.Operator.EQUALS)
180-
checkFilterPredicate(Literal("1") <=> '_1, PredicateLeaf.Operator.NULL_SAFE_EQUALS)
181-
checkFilterPredicate(Literal("2") > '_1, PredicateLeaf.Operator.LESS_THAN)
182-
checkFilterPredicate(Literal("3") < '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
183-
checkFilterPredicate(Literal("1") >= '_1, PredicateLeaf.Operator.LESS_THAN_EQUALS)
184-
checkFilterPredicate(Literal("4") <= '_1, PredicateLeaf.Operator.LESS_THAN)
185-
186-
checkFilterPredicate(!('_1 < "4"), PredicateLeaf.Operator.LESS_THAN)
213+
checkFilterPredicate('_1.isNull, isNullTmpl)
214+
checkFilterPredicate('_1.isNotNull, isNotNullTmpl)
215+
216+
checkFilterPredicate('_1 === "1", equalsTmpl.format("1"))
217+
checkFilterPredicate('_1 <=> "1", nullSafeEqualsTmpl.format("1"))
218+
checkFilterPredicate('_1 !== "1", notEqualsTmpl.format("1"))
219+
220+
checkFilterPredicate('_1 < "2", lessThenTmpl.format("2"))
221+
checkFilterPredicate('_1 > "3", greaterThenTmpl.format("3"))
222+
checkFilterPredicate('_1 <= "1", lessThenEqualsTmpl.format("1"))
223+
checkFilterPredicate('_1 >= "4", greaterThenEqualsTmpl.format("4"))
224+
225+
checkFilterPredicate(Literal("1") === '_1, equalsTmpl.format("1"))
226+
checkFilterPredicate(Literal("1") <=> '_1, nullSafeEqualsTmpl.format("1"))
227+
checkFilterPredicate(Literal("2") > '_1, lessThenTmpl.format("2"))
228+
checkFilterPredicate(Literal("3") < '_1, greaterThenTmpl.format("3"))
229+
checkFilterPredicate(Literal("1") >= '_1, lessThenEqualsTmpl.format("1"))
230+
checkFilterPredicate(Literal("4") <= '_1, greaterThenEqualsTmpl.format("4"))
231+
232+
checkFilterPredicate(!('_1 < "4"), notLessThen.format("4"))
233+
checkFilterPredicate('_1 < "2" || '_1 > "3", andLessThenGreaterThenTmpl.format("2", "3"))
234+
checkFilterPredicate('_1 < "2" && '_1 > "3", orLessThenGreaterThenTmpl.format("2", "3"))
187235
}
188236
}
189237

@@ -193,7 +241,8 @@ class OrcFilterSuite extends QueryTest with OrcTest {
193241
}
194242

195243
withOrcDataFrame((1 to 4).map(i => Tuple1(i.b))) { implicit df =>
196-
checkFilterPredicate('_1.isNull, PredicateLeaf.Operator.IS_NULL)
244+
checkFilterPredicate('_1.isNull, isNullTmpl)
245+
checkFilterPredicate('_1.isNotNull, isNotNullTmpl)
197246
}
198247
}
199248
}

0 commit comments

Comments
 (0)