Skip to content

Commit efd6d29

Browse files
jbimbertilooner
authored andcommitted
DRILL-5796 : implement ROWS_MATCH enum to keep inside rowgroup the filter result information, used to prune the filter if all rows match.
closes #1298
1 parent ee84164 commit efd6d29

17 files changed

Lines changed: 510 additions & 209 deletions

exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicate.java

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,29 @@ private static <C extends Comparable<C>> LogicalExpression createAndPredicate(
4646
ExpressionPosition pos
4747
) {
4848
return new ParquetBooleanPredicate<C>(name, args, pos) {
49+
/**
50+
* Evaluates a compound "AND" filter on the statistics of a RowGroup (the filter reads "filterA and filterB").
51+
* Return value :<ul>
52+
* <li>ALL : only if all filters return ALL
53+
* <li>NONE : if one filter at least returns NONE
54+
* <li>SOME : all other cases
55+
* </ul>
56+
*/
4957
@Override
50-
public boolean canDrop(RangeExprEvaluator<C> evaluator) {
51-
// "and" : as long as one branch is OK to drop, we can drop it.
58+
public RowsMatch matches(RangeExprEvaluator<C> evaluator) {
59+
RowsMatch resultMatch = RowsMatch.ALL;
5260
for (LogicalExpression child : this) {
53-
if (child instanceof ParquetFilterPredicate && ((ParquetFilterPredicate)child).canDrop(evaluator)) {
54-
return true;
61+
if (child instanceof ParquetFilterPredicate) {
62+
switch (((ParquetFilterPredicate) child).matches(evaluator)) {
63+
case NONE:
64+
return RowsMatch.NONE; // No row comply to 1 filter part => can drop RG
65+
case SOME:
66+
resultMatch = RowsMatch.SOME;
67+
default: // Do nothing
68+
}
5569
}
5670
}
57-
return false;
71+
return resultMatch;
5872
}
5973
};
6074
}
@@ -66,15 +80,29 @@ private static <C extends Comparable<C>> LogicalExpression createOrPredicate(
6680
ExpressionPosition pos
6781
) {
6882
return new ParquetBooleanPredicate<C>(name, args, pos) {
83+
/**
84+
* Evaluates a compound "OR" filter on the statistics of a RowGroup (the filter reads "filterA or filterB").
85+
* Return value :<ul>
86+
* <li>NONE : only if all filters return NONE
87+
* <li>ALL : if one filter at least returns ALL
88+
* <li>SOME : all other cases
89+
* </ul>
90+
*/
6991
@Override
70-
public boolean canDrop(RangeExprEvaluator<C> evaluator) {
92+
public RowsMatch matches(RangeExprEvaluator<C> evaluator) {
93+
RowsMatch resultMatch = RowsMatch.NONE;
7194
for (LogicalExpression child : this) {
72-
// "or" : as long as one branch is NOT ok to drop, we can NOT drop it.
73-
if (!(child instanceof ParquetFilterPredicate) || !((ParquetFilterPredicate)child).canDrop(evaluator)) {
74-
return false;
95+
if (child instanceof ParquetFilterPredicate) {
96+
switch (((ParquetFilterPredicate) child).matches(evaluator)) {
97+
case ALL:
98+
return RowsMatch.ALL; // One at least is ALL => can drop filter but not RG
99+
case SOME:
100+
resultMatch = RowsMatch.SOME;
101+
default: // Do nothing
102+
}
75103
}
76104
}
77-
return true;
105+
return resultMatch;
78106
}
79107
};
80108
}

exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicate.java

Lines changed: 43 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@
2626
import java.util.ArrayList;
2727
import java.util.Iterator;
2828
import java.util.List;
29-
import java.util.function.BiPredicate;
29+
import java.util.function.BiFunction;
3030

31+
import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.hasNoNulls;
3132
import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.isNullOrEmpty;
3233
import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.isAllNulls;
3334

@@ -38,12 +39,13 @@ public class ParquetComparisonPredicate<C extends Comparable<C>> extends Logical
3839
implements ParquetFilterPredicate<C> {
3940
private final LogicalExpression left;
4041
private final LogicalExpression right;
41-
private final BiPredicate<Statistics<C>, Statistics<C>> predicate;
42+
43+
private final BiFunction<Statistics<C>, Statistics<C>, RowsMatch> predicate;
4244

4345
private ParquetComparisonPredicate(
4446
LogicalExpression left,
4547
LogicalExpression right,
46-
BiPredicate<Statistics<C>, Statistics<C>> predicate
48+
BiFunction<Statistics<C>, Statistics<C>, RowsMatch> predicate
4749
) {
4850
super(left.getPosition());
4951
this.left = left;
@@ -65,7 +67,7 @@ public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V valu
6567
}
6668

6769
/**
68-
* Semantics of canDrop() is very similar to what is implemented in Parquet library's
70+
* Semantics of matches() is very similar to what is implemented in Parquet library's
6971
* {@link org.apache.parquet.filter2.statisticslevel.StatisticsFilter} and
7072
* {@link org.apache.parquet.filter2.predicate.FilterPredicate}
7173
*
@@ -83,23 +85,29 @@ public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V valu
8385
* where Column1 and Column2 are from same parquet table.
8486
*/
8587
@Override
86-
public boolean canDrop(RangeExprEvaluator<C> evaluator) {
88+
public RowsMatch matches(RangeExprEvaluator<C> evaluator) {
8789
Statistics<C> leftStat = left.accept(evaluator, null);
8890
if (isNullOrEmpty(leftStat)) {
89-
return false;
91+
return RowsMatch.SOME;
9092
}
91-
9293
Statistics<C> rightStat = right.accept(evaluator, null);
9394
if (isNullOrEmpty(rightStat)) {
94-
return false;
95+
return RowsMatch.SOME;
9596
}
96-
97-
// if either side is ALL null, = is evaluated to UNKNOWN -> canDrop
9897
if (isAllNulls(leftStat, evaluator.getRowCount()) || isAllNulls(rightStat, evaluator.getRowCount())) {
99-
return true;
98+
return RowsMatch.NONE;
99+
}
100+
if (!leftStat.hasNonNullValue() || !rightStat.hasNonNullValue()) {
101+
return RowsMatch.SOME;
100102
}
103+
return predicate.apply(leftStat, rightStat);
104+
}
101105

102-
return (leftStat.hasNonNullValue() && rightStat.hasNonNullValue()) && predicate.test(leftStat, rightStat);
106+
/**
107+
* If one rowgroup contains some null values, change the RowsMatch.ALL into RowsMatch.SOME (null values should be discarded by filter)
108+
*/
109+
private static RowsMatch checkNull(Statistics leftStat, Statistics rightStat) {
110+
return !hasNoNulls(leftStat) || !hasNoNulls(rightStat) ? RowsMatch.SOME : RowsMatch.ALL;
103111
}
104112

105113
/**
@@ -109,12 +117,9 @@ private static <C extends Comparable<C>> LogicalExpression createEqualPredicate(
109117
LogicalExpression left,
110118
LogicalExpression right
111119
) {
112-
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
113-
// can drop when left's max < right's min, or right's max < left's min
114-
final C leftMin = leftStat.genericGetMin();
115-
final C rightMin = rightStat.genericGetMin();
116-
return (leftStat.compareMaxToValue(rightMin) < 0) || (rightStat.compareMaxToValue(leftMin) < 0);
117-
}) {
120+
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) ->
121+
leftStat.compareMaxToValue(rightStat.genericGetMin()) < 0 || rightStat.compareMaxToValue(leftStat.genericGetMin()) < 0 ? RowsMatch.NONE : RowsMatch.SOME
122+
) {
118123
@Override
119124
public String toString() {
120125
return left + " = " + right;
@@ -130,9 +135,10 @@ private static <C extends Comparable<C>> LogicalExpression createGTPredicate(
130135
LogicalExpression right
131136
) {
132137
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
133-
// can drop when left's max <= right's min.
134-
final C rightMin = rightStat.genericGetMin();
135-
return leftStat.compareMaxToValue(rightMin) <= 0;
138+
if (leftStat.compareMaxToValue(rightStat.genericGetMin()) <= 0) {
139+
return RowsMatch.NONE;
140+
}
141+
return leftStat.compareMinToValue(rightStat.genericGetMax()) > 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME;
136142
});
137143
}
138144

@@ -144,9 +150,10 @@ private static <C extends Comparable<C>> LogicalExpression createGEPredicate(
144150
LogicalExpression right
145151
) {
146152
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
147-
// can drop when left's max < right's min.
148-
final C rightMin = rightStat.genericGetMin();
149-
return leftStat.compareMaxToValue(rightMin) < 0;
153+
if (leftStat.compareMaxToValue(rightStat.genericGetMin()) < 0) {
154+
return RowsMatch.NONE;
155+
}
156+
return leftStat.compareMinToValue(rightStat.genericGetMax()) >= 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME;
150157
});
151158
}
152159

@@ -158,9 +165,10 @@ private static <C extends Comparable<C>> LogicalExpression createLTPredicate(
158165
LogicalExpression right
159166
) {
160167
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
161-
// can drop when right's max <= left's min.
162-
final C leftMin = leftStat.genericGetMin();
163-
return rightStat.compareMaxToValue(leftMin) <= 0;
168+
if (rightStat.compareMaxToValue(leftStat.genericGetMin()) <= 0) {
169+
return RowsMatch.NONE;
170+
}
171+
return leftStat.compareMaxToValue(rightStat.genericGetMin()) < 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME;
164172
});
165173
}
166174

@@ -171,9 +179,10 @@ private static <C extends Comparable<C>> LogicalExpression createLEPredicate(
171179
LogicalExpression left, LogicalExpression right
172180
) {
173181
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
174-
// can drop when right's max < left's min.
175-
final C leftMin = leftStat.genericGetMin();
176-
return rightStat.compareMaxToValue(leftMin) < 0;
182+
if (rightStat.compareMaxToValue(leftStat.genericGetMin()) < 0) {
183+
return RowsMatch.NONE;
184+
}
185+
return leftStat.compareMaxToValue(rightStat.genericGetMin()) <= 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME;
177186
});
178187
}
179188

@@ -185,11 +194,10 @@ private static <C extends Comparable<C>> LogicalExpression createNEPredicate(
185194
LogicalExpression right
186195
) {
187196
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
188-
// can drop when there is only one unique value.
189-
final C leftMax = leftStat.genericGetMax();
190-
final C rightMax = rightStat.genericGetMax();
191-
return leftStat.compareMinToValue(leftMax) == 0 && rightStat.compareMinToValue(rightMax) == 0 &&
192-
leftStat.compareMaxToValue(rightMax) == 0;
197+
if (leftStat.compareMaxToValue(rightStat.genericGetMin()) < 0 || rightStat.compareMaxToValue(leftStat.genericGetMin()) < 0) {
198+
return checkNull(leftStat, rightStat);
199+
}
200+
return leftStat.compareMaxToValue(rightStat.genericGetMax()) == 0 && leftStat.compareMinToValue(rightStat.genericGetMin()) == 0 ? RowsMatch.NONE : RowsMatch.SOME;
193201
});
194202
}
195203

exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetFilterPredicate.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,16 @@
1818
package org.apache.drill.exec.expr.stat;
1919

2020
public interface ParquetFilterPredicate<T extends Comparable<T>> {
21-
boolean canDrop(RangeExprEvaluator<T> evaluator);
21+
22+
/**
23+
* Define the validity of a row group against a filter
24+
* <ul>
25+
* <li>ALL : all rows match the filter (can not drop the row group and can prune the filter)
26+
* <li>NONE : no row matches the filter (can drop the row group)
27+
* <li>SOME : some rows only match the filter or the filter can not be applied (can not drop the row group nor the filter)
28+
* </ul>
29+
*/
30+
enum RowsMatch {ALL, NONE, SOME}
31+
32+
RowsMatch matches(RangeExprEvaluator<T> evaluator);
2233
}

0 commit comments

Comments
 (0)