@@ -66,7 +66,9 @@ private[sql] object OrcFilters extends OrcFiltersBase {
6666 for {
6767 // Combines all convertible filters using `And` to produce a single conjunction
6868 conjunction <- buildTree(convertibleFilters(schema, dataTypeMap, filters))
69- // Then tries to build a single ORC `SearchArgument` for the conjunction predicate
69+ // Then tries to build a single ORC `SearchArgument` for the conjunction predicate.
70+ // The input predicate is fully convertible. There should not be any empty result in the
71+ // following recursive method call `buildSearchArgument`.
7072 builder <- buildSearchArgument(dataTypeMap, conjunction, newBuilder)
7173 } yield builder.build()
7274 }
@@ -80,6 +82,17 @@ private[sql] object OrcFilters extends OrcFiltersBase {
8082 def convertibleFiltersHelper (
8183 filter : Filter ,
8284 canPartialPushDown : Boolean ): Option [Filter ] = filter match {
85+ // At here, it is not safe to just convert one side and remove the other side
86+ // if we do not understand what the parent filters are.
87+ //
88+ // Here is an example used to explain the reason.
89+ // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to
90+ // convert b in ('1'). If we only convert a = 2, we will end up with a filter
91+ // NOT(a = 2), which will generate wrong results.
92+ //
93+ // Pushing one side of AND down is only safe to do at the top level or in the child
94+ // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate
95+ // can be safely removed.
8396 case And (left, right) =>
8497 val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown)
8598 val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown)
@@ -90,6 +103,17 @@ private[sql] object OrcFilters extends OrcFiltersBase {
90103 case _ => None
91104 }
92105
106+ // The Or predicate is convertible when both of its children can be pushed down.
107+ // That is to say, if one/both of the children can be partially pushed down, the Or
108+ // predicate can be partially pushed down as well.
109+ //
110+ // Here is an example used to explain the reason.
111+ // Let's say we have
112+ // (a1 AND a2) OR (b1 AND b2),
113+ // a1 and b1 is convertible, while a2 and b2 is not.
114+ // The predicate can be converted as
115+ // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
116+ // As per the logical in And predicate, we can push down (a1 OR b1).
93117 case Or (left, right) =>
94118 val leftResultOptional = convertibleFiltersHelper(left, canPartialPushDown)
95119 val rightResultOptional = convertibleFiltersHelper(right, canPartialPushDown)
@@ -150,87 +174,40 @@ private[sql] object OrcFilters extends OrcFiltersBase {
150174 dataTypeMap : Map [String , DataType ],
151175 expression : Filter ,
152176 builder : Builder ): Option [Builder ] = {
153- createBuilder(dataTypeMap, expression, builder, canPartialPushDownConjuncts = true )
177+ createBuilder(dataTypeMap, expression, builder)
154178 }
155179
156180 /**
157181 * @param dataTypeMap a map from the attribute name to its data type.
158182 * @param expression the input filter predicates.
159183 * @param builder the input SearchArgument.Builder.
160- * @param canPartialPushDownConjuncts whether a subset of conjuncts of predicates can be pushed
161- * down safely. Pushing ONLY one side of AND down is safe to
162- * do at the top level or none of its ancestors is NOT and OR.
163184 * @return the builder so far.
164185 */
165186 private def createBuilder (
166187 dataTypeMap : Map [String , DataType ],
167188 expression : Filter ,
168- builder : Builder ,
169- canPartialPushDownConjuncts : Boolean ): Option [Builder ] = {
189+ builder : Builder ): Option [Builder ] = {
170190 def getType (attribute : String ): PredicateLeaf .Type =
171191 getPredicateLeafType(dataTypeMap(attribute))
172192
173193 import org .apache .spark .sql .sources ._
174194
175195 expression match {
176196 case And (left, right) =>
177- // At here, it is not safe to just convert one side and remove the other side
178- // if we do not understand what the parent filters are.
179- //
180- // Here is an example used to explain the reason.
181- // Let's say we have NOT(a = 2 AND b in ('1')) and we do not understand how to
182- // convert b in ('1'). If we only convert a = 2, we will end up with a filter
183- // NOT(a = 2), which will generate wrong results.
184- //
185- // Pushing one side of AND down is only safe to do at the top level or in the child
186- // AND before hitting NOT or OR conditions, and in this case, the unsupported predicate
187- // can be safely removed.
188- val leftBuilderOption =
189- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts)
190- val rightBuilderOption =
191- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts)
192- (leftBuilderOption, rightBuilderOption) match {
193- case (Some (_), Some (_)) =>
194- for {
195- lhs <- createBuilder(dataTypeMap, left,
196- builder.startAnd(), canPartialPushDownConjuncts)
197- rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts)
198- } yield rhs.end()
199-
200- case (Some (_), None ) if canPartialPushDownConjuncts =>
201- createBuilder(dataTypeMap, left, builder, canPartialPushDownConjuncts)
202-
203- case (None , Some (_)) if canPartialPushDownConjuncts =>
204- createBuilder(dataTypeMap, right, builder, canPartialPushDownConjuncts)
205-
206- case _ => None
207- }
197+ for {
198+ lhs <- createBuilder(dataTypeMap, left, builder.startAnd())
199+ rhs <- createBuilder(dataTypeMap, right, lhs)
200+ } yield rhs.end()
208201
209202 case Or (left, right) =>
210- // The Or predicate is convertible when both of its children can be pushed down.
211- // That is to say, if one/both of the children can be partially pushed down, the Or
212- // predicate can be partially pushed down as well.
213- //
214- // Here is an example used to explain the reason.
215- // Let's say we have
216- // (a1 AND a2) OR (b1 AND b2),
217- // a1 and b1 is convertible, while a2 and b2 is not.
218- // The predicate can be converted as
219- // (a1 OR b1) AND (a1 OR b2) AND (a2 OR b1) AND (a2 OR b2)
220- // As per the logical in And predicate, we can push down (a1 OR b1).
221203 for {
222- _ <- createBuilder(dataTypeMap, left, newBuilder, canPartialPushDownConjuncts)
223- _ <- createBuilder(dataTypeMap, right, newBuilder, canPartialPushDownConjuncts)
224- lhs <- createBuilder(dataTypeMap, left,
225- builder.startOr(), canPartialPushDownConjuncts)
226- rhs <- createBuilder(dataTypeMap, right, lhs, canPartialPushDownConjuncts)
204+ lhs <- createBuilder(dataTypeMap, left, builder.startOr())
205+ rhs <- createBuilder(dataTypeMap, right, lhs)
227206 } yield rhs.end()
228207
229208 case Not (child) =>
230209 for {
231- _ <- createBuilder(dataTypeMap, child, newBuilder, canPartialPushDownConjuncts = false )
232- negate <- createBuilder(dataTypeMap,
233- child, builder.startNot(), canPartialPushDownConjuncts = false )
210+ negate <- createBuilder(dataTypeMap, child, builder.startNot())
234211 } yield negate.end()
235212
236213 // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()`
0 commit comments