2626import java .util .ArrayList ;
2727import java .util .Iterator ;
2828import java .util .List ;
29- import java .util .function .BiPredicate ;
29+ import java .util .function .BiFunction ;
3030
31+ import static org .apache .drill .exec .expr .stat .ParquetPredicatesHelper .hasNoNulls ;
3132import static org .apache .drill .exec .expr .stat .ParquetPredicatesHelper .isNullOrEmpty ;
3233import static org .apache .drill .exec .expr .stat .ParquetPredicatesHelper .isAllNulls ;
3334
@@ -38,12 +39,13 @@ public class ParquetComparisonPredicate<C extends Comparable<C>> extends Logical
3839 implements ParquetFilterPredicate <C > {
3940 private final LogicalExpression left ;
4041 private final LogicalExpression right ;
41- private final BiPredicate <Statistics <C >, Statistics <C >> predicate ;
42+
43+ private final BiFunction <Statistics <C >, Statistics <C >, RowsMatch > predicate ;
4244
4345 private ParquetComparisonPredicate (
4446 LogicalExpression left ,
4547 LogicalExpression right ,
46- BiPredicate <Statistics <C >, Statistics <C >> predicate
48+ BiFunction <Statistics <C >, Statistics <C >, RowsMatch > predicate
4749 ) {
4850 super (left .getPosition ());
4951 this .left = left ;
@@ -65,7 +67,7 @@ public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V valu
6567 }
6668
6769 /**
68- * Semantics of canDrop () is very similar to what is implemented in Parquet library's
70+ * Semantics of matches () is very similar to what is implemented in Parquet library's
6971 * {@link org.apache.parquet.filter2.statisticslevel.StatisticsFilter} and
7072 * {@link org.apache.parquet.filter2.predicate.FilterPredicate}
7173 *
@@ -83,23 +85,29 @@ public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V valu
8385 * where Column1 and Column2 are from same parquet table.
8486 */
8587 @ Override
86- public boolean canDrop (RangeExprEvaluator <C > evaluator ) {
88+ public RowsMatch matches (RangeExprEvaluator <C > evaluator ) {
8789 Statistics <C > leftStat = left .accept (evaluator , null );
8890 if (isNullOrEmpty (leftStat )) {
89- return false ;
91+ return RowsMatch . SOME ;
9092 }
91-
9293 Statistics <C > rightStat = right .accept (evaluator , null );
9394 if (isNullOrEmpty (rightStat )) {
94- return false ;
95+ return RowsMatch . SOME ;
9596 }
96-
97- // if either side is ALL null, = is evaluated to UNKNOWN -> canDrop
9897 if (isAllNulls (leftStat , evaluator .getRowCount ()) || isAllNulls (rightStat , evaluator .getRowCount ())) {
99- return true ;
98+ return RowsMatch .NONE ;
99+ }
100+ if (!leftStat .hasNonNullValue () || !rightStat .hasNonNullValue ()) {
101+ return RowsMatch .SOME ;
100102 }
103+ return predicate .apply (leftStat , rightStat );
104+ }
101105
102- return (leftStat .hasNonNullValue () && rightStat .hasNonNullValue ()) && predicate .test (leftStat , rightStat );
106+ /**
107+ * If one rowgroup contains some null values, change the RowsMatch.ALL into RowsMatch.SOME (null values should be discarded by filter)
108+ */
109+ private static RowsMatch checkNull (Statistics leftStat , Statistics rightStat ) {
110+ return !hasNoNulls (leftStat ) || !hasNoNulls (rightStat ) ? RowsMatch .SOME : RowsMatch .ALL ;
103111 }
104112
105113 /**
@@ -109,12 +117,9 @@ private static <C extends Comparable<C>> LogicalExpression createEqualPredicate(
109117 LogicalExpression left ,
110118 LogicalExpression right
111119 ) {
112- return new ParquetComparisonPredicate <C >(left , right , (leftStat , rightStat ) -> {
113- // can drop when left's max < right's min, or right's max < left's min
114- final C leftMin = leftStat .genericGetMin ();
115- final C rightMin = rightStat .genericGetMin ();
116- return (leftStat .compareMaxToValue (rightMin ) < 0 ) || (rightStat .compareMaxToValue (leftMin ) < 0 );
117- }) {
120+ return new ParquetComparisonPredicate <C >(left , right , (leftStat , rightStat ) ->
121+ leftStat .compareMaxToValue (rightStat .genericGetMin ()) < 0 || rightStat .compareMaxToValue (leftStat .genericGetMin ()) < 0 ? RowsMatch .NONE : RowsMatch .SOME
122+ ) {
118123 @ Override
119124 public String toString () {
120125 return left + " = " + right ;
@@ -130,9 +135,10 @@ private static <C extends Comparable<C>> LogicalExpression createGTPredicate(
130135 LogicalExpression right
131136 ) {
132137 return new ParquetComparisonPredicate <C >(left , right , (leftStat , rightStat ) -> {
133- // can drop when left's max <= right's min.
134- final C rightMin = rightStat .genericGetMin ();
135- return leftStat .compareMaxToValue (rightMin ) <= 0 ;
138+ if (leftStat .compareMaxToValue (rightStat .genericGetMin ()) <= 0 ) {
139+ return RowsMatch .NONE ;
140+ }
141+ return leftStat .compareMinToValue (rightStat .genericGetMax ()) > 0 ? checkNull (leftStat , rightStat ) : RowsMatch .SOME ;
136142 });
137143 }
138144
@@ -144,9 +150,10 @@ private static <C extends Comparable<C>> LogicalExpression createGEPredicate(
144150 LogicalExpression right
145151 ) {
146152 return new ParquetComparisonPredicate <C >(left , right , (leftStat , rightStat ) -> {
147- // can drop when left's max < right's min.
148- final C rightMin = rightStat .genericGetMin ();
149- return leftStat .compareMaxToValue (rightMin ) < 0 ;
153+ if (leftStat .compareMaxToValue (rightStat .genericGetMin ()) < 0 ) {
154+ return RowsMatch .NONE ;
155+ }
156+ return leftStat .compareMinToValue (rightStat .genericGetMax ()) >= 0 ? checkNull (leftStat , rightStat ) : RowsMatch .SOME ;
150157 });
151158 }
152159
@@ -158,9 +165,10 @@ private static <C extends Comparable<C>> LogicalExpression createLTPredicate(
158165 LogicalExpression right
159166 ) {
160167 return new ParquetComparisonPredicate <C >(left , right , (leftStat , rightStat ) -> {
161- // can drop when right's max <= left's min.
162- final C leftMin = leftStat .genericGetMin ();
163- return rightStat .compareMaxToValue (leftMin ) <= 0 ;
168+ if (rightStat .compareMaxToValue (leftStat .genericGetMin ()) <= 0 ) {
169+ return RowsMatch .NONE ;
170+ }
171+ return leftStat .compareMaxToValue (rightStat .genericGetMin ()) < 0 ? checkNull (leftStat , rightStat ) : RowsMatch .SOME ;
164172 });
165173 }
166174
@@ -171,9 +179,10 @@ private static <C extends Comparable<C>> LogicalExpression createLEPredicate(
171179 LogicalExpression left , LogicalExpression right
172180 ) {
173181 return new ParquetComparisonPredicate <C >(left , right , (leftStat , rightStat ) -> {
174- // can drop when right's max < left's min.
175- final C leftMin = leftStat .genericGetMin ();
176- return rightStat .compareMaxToValue (leftMin ) < 0 ;
182+ if (rightStat .compareMaxToValue (leftStat .genericGetMin ()) < 0 ) {
183+ return RowsMatch .NONE ;
184+ }
185+ return leftStat .compareMaxToValue (rightStat .genericGetMin ()) <= 0 ? checkNull (leftStat , rightStat ) : RowsMatch .SOME ;
177186 });
178187 }
179188
@@ -185,11 +194,10 @@ private static <C extends Comparable<C>> LogicalExpression createNEPredicate(
185194 LogicalExpression right
186195 ) {
187196 return new ParquetComparisonPredicate <C >(left , right , (leftStat , rightStat ) -> {
188- // can drop when there is only one unique value.
189- final C leftMax = leftStat .genericGetMax ();
190- final C rightMax = rightStat .genericGetMax ();
191- return leftStat .compareMinToValue (leftMax ) == 0 && rightStat .compareMinToValue (rightMax ) == 0 &&
192- leftStat .compareMaxToValue (rightMax ) == 0 ;
197+ if (leftStat .compareMaxToValue (rightStat .genericGetMin ()) < 0 || rightStat .compareMaxToValue (leftStat .genericGetMin ()) < 0 ) {
198+ return checkNull (leftStat , rightStat );
199+ }
200+ return leftStat .compareMaxToValue (rightStat .genericGetMax ()) == 0 && leftStat .compareMinToValue (rightStat .genericGetMin ()) == 0 ? RowsMatch .NONE : RowsMatch .SOME ;
193201 });
194202 }
195203
0 commit comments